[SPIRV][AMDGPU][clang][CodeGen][opt] Add late-resolved feature identifying predicates (#134016)

This change adds two builtins for AMDGPU:

- `__builtin_amdgcn_processor_is`, which is similar in observable
behaviour with `__builtin_cpu_is`, except that it is never "evaluated"
at run time;
- `__builtin_amdgcn_is_invocable`, which is behaviourally similar with
`__has_builtin`, except that it is not a macro (i.e. not evaluated at
preprocessing time).

Neither of these are `constexpr`, even though when compiling for
concrete (i.e. `gfxXXX` / `gfxXXX-generic`) targets they get evaluated
in Clang, so they shouldn't tear the AST too badly / at all for
multi-pass compilation cases like HIP. They can only be used in specific
contexts (as args to control structures).

The motivation for adding these is two-fold:

- as a nice to have, it provides an AST-visible way to incorporate
architecture specific code, rather than having to rely on macros and the
preprocessor, which burn in the choice quite early;
- as a must have, it allows featureful AMDGCN flavoured SPIR-V to be
produced, where target specific capability is guarded and chosen or
discarded when finalising compilation for a concrete target; this is
built atop the Speciali\ation Constant concept which is described in the
SPIR-V specification under section [2.12
Specialization](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_specialization_2)

I've tried to keep the overall footprint of the change small. The
changes to Sema are a bit unpleasant, but there was a strong desire to
have Clang validate these, and to constrain their uses, and this was the
most compact solution I could come up with (suggestions welcome).

---------

Co-authored-by: Juan Manuel Martinez Caamaño <jmartinezcaamao@gmail.com>
Co-authored-by: Voicu <avoicu@amd.com>
This commit is contained in:
Alex Voicu 2026-03-30 23:02:26 +01:00 committed by GitHub
parent 5b00cdf8e1
commit 18e6958903
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
47 changed files with 2099 additions and 161 deletions

View File

@ -5558,6 +5558,120 @@ If no address spaces names are provided, all address spaces are fenced.
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
__builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``__builtin_amdgcn_processor_is`` and ``__builtin_amdgcn_is_invocable`` provide
a functional mechanism for programatically querying:
* the identity of the current target processor;
* the capability of the current target processor to invoke a particular builtin.
**Syntax**:
.. code-block:: c
__amdgpu_feature_predicate_t __builtin_amdgcn_processor_is(const char*);
__amdgpu_feature_predicate_t __builtin_amdgcn_is_invocable(builtin_name);
**Example of use**:
.. code-block:: c++
if (__builtin_amdgcn_processor_is("gfx1201") ||
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var))
__builtin_amdgcn_s_sleep_var(x);
if (!__builtin_amdgcn_processor_is("gfx906"))
__builtin_amdgcn_s_wait_event_export_ready();
else if (__builtin_amdgcn_processor_is("gfx1010") ||
__builtin_amdgcn_processor_is("gfx1101"))
__builtin_amdgcn_s_ttracedata_imm(1);
while (__builtin_amdgcn_processor_is("gfx1101")) *p += x;
do {
break;
} while (__builtin_amdgcn_processor_is("gfx1010"));
for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break;
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready))
__builtin_amdgcn_s_wait_event_export_ready();
else if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_ttracedata_imm))
__builtin_amdgcn_s_ttracedata_imm(1);
do {
break;
} while (
__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
for (; __builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64); ++*p)
break;
**Description**:
The builtins return a value of type ``__amdgpu_feature_predicate_t``, which is a
target specific type that behaves as if its C++ definition was the following:
.. code-block:: c++
struct __amdgpu_feature_predicate_t {
__amdgpu_feature_predicate_t() = delete;
__amdgpu_feature_predicate_t(const __amdgpu_feature_predicate_t&) = delete;
__amdgpu_feature_predicate_t(__amdgpu_feature_predicate_t&&) = delete;
explicit
operator bool() const noexcept;
};
The builtins can be used in C as well, wherein the
``__amdgpu_feature_predicate_t`` type behaves as an opaque, forward declared
type with conditional automated conversion to ``_Bool`` when used as the
predicate argument to a control structure:
.. code-block:: c
struct __amdgpu_feature_predicate_t ret(); // Error
void arg(struct __amdgpu_feature_predicate_t); // Error
void local() {
struct __amdgpu_feature_predicate_t x; // Error
struct __amdgpu_feature_predicate_t y =
__builtin_amdgcn_processor_is("gfx900"); // Error
}
void valid_use() {
_Bool x = (_Bool)__builtin_amdgcn_processor_is("gfx900"); // OK
if (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool
return;
for (; __builtin_amdgcn_processor_is("gfx900");) // Implicit cast to _Bool
break;
while (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool
break;
do {
break;
} while (__builtin_amdgcn_processor_is("gfx900")); // Implicit cast to _Bool
__builtin_amdgcn_processor_is("gfx900") ? x : !x;
}
The boolean interpretation of the predicate values returned by the builtins:
* indicates whether the current target matches the argument; the argument MUST
be a string literal and a valid AMDGPU target
* indicates whether the builtin function passed as the argument can be invoked
by the current target; the argument MUST be either a generic or AMDGPU
specific builtin name
When invoked while compiling for a concrete target, the builtins are evaluated
early by Clang, and never produce any CodeGen effects / have no observable
side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
which is an abstract target, a series of specialization constants are implicitly
created, in correspondence with the predicates. These predicates get resolved
when finalizing the compilation process for a concrete target, and shall reflect
the latter's identity and features. Thus, it is possible to author high-level
code, in e.g. HIP, that is target adaptive in a dynamic fashion, contrary to
macro based mechanisms.
__builtin_amdgcn_ballot_w{32,64}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -458,7 +458,10 @@ Target Specific Changes
AMDGPU Support
^^^^^^^^^^^^^^
- Introduced a new target specific builtin ``__builtin_amdgcn_processor_is``,
a late / deferred query for the current target processor.
- Introduced a new target specific builtin ``__builtin_amdgcn_is_invocable``,
a late / deferred query for the availability of target specific builtins.
- Initial support for gfx1310
NVPTX Support

View File

@ -20,11 +20,19 @@
AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
#endif
#ifndef AMDGPU_FEATURE_PREDICATE_TYPE
#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
#endif
AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8)
AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_texture_t", AMDGPUTexture, AMDGPUTextureTy, 256, 256, 0)
AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0)
AMDGPU_FEATURE_PREDICATE_TYPE("__amdgpu_feature_predicate_t", AMDGPUFeaturePredicate, AMDGPUFeaturePredicateTy, 1, 1)
#undef AMDGPU_TYPE
#undef AMDGPU_OPAQUE_PTR_TYPE
#undef AMDGPU_NAMED_BARRIER_TYPE
#undef AMDGPU_FEATURE_PREDICATE_TYPE

View File

@ -34,6 +34,7 @@
// Q -> target builtin type, followed by a character to distinguish the builtin type
// Qa -> AArch64 svcount_t builtin type.
// Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type.
// Qc -> AMDGPU __amdgpu_feature_predicate_t builtin type.
// Qt -> AMDGPU __amdgpu_texture_t builtin type.
// E -> ext_vector, followed by the number of elements and the base type.
// X -> _Complex, followed by the base type.

View File

@ -523,6 +523,11 @@ def __builtin_amdgcn_endpgm : AMDGPUBuiltin<"void()", [NoReturn]>;
def __builtin_amdgcn_get_fpenv : AMDGPUBuiltin<"uint64_t()">;
def __builtin_amdgcn_set_fpenv : AMDGPUBuiltin<"void(uint64_t)">;
// These are special FE only builtins intended for forwarding the requirements
// to the ME.
def __builtin_amdgcn_processor_is : AMDGPUBuiltin<"__amdgpu_feature_predicate_t(char const *)", [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments]>;
def __builtin_amdgcn_is_invocable : AMDGPUBuiltin<"__amdgpu_feature_predicate_t()", [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments]>;
//===----------------------------------------------------------------------===//
// Wave Reduction builtins.

View File

@ -1927,3 +1927,6 @@ def TrivialAutoVarInit : DiagGroup<"trivial-auto-var-init">;
// A warning for options that enable a feature that is not yet complete
def ExperimentalOption : DiagGroup<"experimental-option">;
// Warnings about unguarded usages of AMDGPU target specific constructs
def UnguardedBuiltinUsageAMDGPU : DiagGroup<"amdgpu-unguarded-builtin-usage">;

View File

@ -14117,6 +14117,47 @@ def note_acc_reduction_combiner_forming
// AMDGCN builtins diagnostics
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
def err_amdgcn_processor_is_arg_not_literal
: Error<"the argument to __builtin_amdgcn_processor_is must be a string "
"literal">;
def err_amdgcn_processor_is_arg_invalid_value
: Error<"the argument to __builtin_amdgcn_processor_is must be a valid "
"AMDGCN processor identifier; '%0' is not valid">;
def note_amdgcn_processor_is_valid_options
: Note<"valid AMDGCN processor identifiers are: %0">;
def err_amdgcn_is_invocable_arg_invalid_value
: Error<"the argument to __builtin_amdgcn_is_invocable must be either a "
"target agnostic builtin or an AMDGCN target specific builtin; '%0'"
" is not valid">;
def err_amdgcn_predicate_type_is_not_constructible
: Error<"%0 has type __amdgpu_feature_predicate_t, which is not"
" constructible">;
def err_amdgcn_predicate_type_needs_explicit_bool_cast
: Error<"%0 must be explicitly cast to %1; however, please note that this "
"is almost always an error and that it prevents the effective "
"guarding of target dependent code, and thus should be avoided">;
def note_amdgcn_protected_by_predicate : Note<"jump enters statement controlled"
" by AMDGPU feature predicate">;
def err_amdgcn_conflicting_is_processor_options
: Error<"conflicting check for AMDGCN processor %0 found in a scope already"
" controlled by a check for AMDGCN processor">;
def note_amdgcn_previous_is_processor_guard
: Note<"predicate guard, with establishes the context, inserted here">;
def warn_amdgcn_unguarded_asm_stmt
: Warning<"the '%0' ASM sequence might be invalid for some AMDGPU targets">,
InGroup<UnguardedBuiltinUsageAMDGPU>, DefaultIgnore;
def note_amdgcn_unguarded_asm_silence
: Note<"enclose the '%0' ASM sequence in a scope controlled by a "
"__builtin_amdgcn_is_processor check to silence this warning">;
def err_amdgcn_incompatible_builtin
: Error<"%0 cannot be invoked in the current context, as it requires the "
"'%1' feature(s)%select{|, which '%3' does not provide}2">;
def warn_amdgcn_unguarded_builtin :
Warning<"%0 might be unavailable on some AMDGPU targets">,
InGroup<UnguardedBuiltinUsageAMDGPU>, DefaultIgnore;
def note_amdgcn_unguarded_builtin_silence
: Note<"enclose %0 in a __builtin_amdgcn_is_invocable check to silence "
"this warning">;
def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">;

View File

@ -15,12 +15,17 @@
#include "clang/AST/ASTFwd.h"
#include "clang/Sema/SemaBase.h"
#include "llvm/ADT/SmallPtrSet.h"
namespace clang {
class AttributeCommonInfo;
class Expr;
class ParsedAttr;
class SemaAMDGPU : public SemaBase {
llvm::SmallPtrSet<Expr *, 32> ExpandedPredicates;
llvm::SmallPtrSet<FunctionDecl *, 32> PotentiallyUnguardedBuiltinUsers;
public:
SemaAMDGPU(Sema &S);
@ -73,6 +78,16 @@ public:
void handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL);
void handleAMDGPUMaxNumWorkGroupsAttr(Decl *D, const ParsedAttr &AL);
void handleAMDGPUFlatWorkGroupSizeAttr(Decl *D, const ParsedAttr &AL);
/// Expand a valid use of the feature identification builtins into its
/// corresponding sequence of instructions.
Expr *ExpandAMDGPUPredicateBuiltIn(Expr *CE);
bool IsPredicate(Expr *E) const;
/// Diagnose unguarded usages of AMDGPU builtins and recommend guarding with
/// __builtin_amdgcn_is_invocable
void AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD);
bool HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const;
void DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD);
};
} // namespace clang

View File

@ -1166,7 +1166,7 @@ enum PredefinedTypeIDs {
///
/// Type IDs for non-predefined types will start at
/// NUM_PREDEF_TYPE_IDs.
const unsigned NUM_PREDEF_TYPE_IDS = 514;
const unsigned NUM_PREDEF_TYPE_IDS = 515;
// Ensure we do not overrun the predefined types we reserved
// in the enum PredefinedTypeIDs above.

View File

@ -1426,7 +1426,12 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
}
if (Target.getTriple().isAMDGPU() ||
(AuxTarget && AuxTarget->getTriple().isAMDGPU())) {
(Target.getTriple().isSPIRV() &&
Target.getTriple().getVendor() == llvm::Triple::AMD) ||
(AuxTarget &&
(AuxTarget->getTriple().isAMDGPU() ||
((AuxTarget->getTriple().isSPIRV() &&
AuxTarget->getTriple().getVendor() == llvm::Triple::AMD))))) {
#define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \
InitBuiltinType(SingletonId, BuiltinType::Id);
#include "clang/Basic/AMDGPUTypes.def"
@ -12739,6 +12744,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
Type = Context.AMDGPUBufferRsrcTy;
break;
}
case 'c': {
Type = Context.AMDGPUFeaturePredicateTy;
break;
}
case 't': {
Type = Context.AMDGPUTextureTy;
break;

View File

@ -2585,6 +2585,8 @@ bool Type::isSizelessBuiltinType() const {
// HLSL intangible types
#define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
#include "clang/Basic/HLSLIntangibleTypes.def"
// AMDGPU feature predicate type
case BuiltinType::AMDGPUFeaturePredicate:
return true;
default:
return false;

View File

@ -181,3 +181,12 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {
Float128Format = DoubleFormat;
}
}
bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {
return AMDGPUTI.isValidCPUName(CPU);
}
void SPIRV64AMDGCNTargetInfo::fillValidCPUList(
SmallVectorImpl<StringRef> &Values) const {
return AMDGPUTI.fillValidCPUList(Values);
}

View File

@ -480,6 +480,11 @@ public:
}
bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
// This is only needed for validating arguments passed to
// __builtin_amdgcn_processor_is
bool isValidCPUName(StringRef Name) const override;
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
};
class LLVM_LIBRARY_VISIBILITY SPIRV64IntelTargetInfo final

View File

@ -1105,6 +1105,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_unsigned); \
return SingletonId; \
}
#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
case BuiltinType::Id: { \
if (!SingletonId) \
SingletonId = \
DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_boolean); \
return SingletonId; \
}
#include "clang/Basic/AMDGPUTypes.def"
case BuiltinType::UChar:
case BuiltinType::Char_U:

View File

@ -1040,6 +1040,10 @@ Value *ScalarExprEmitter::EmitConversionToBool(Value *Src, QualType SrcType) {
if (const MemberPointerType *MPT = dyn_cast<MemberPointerType>(SrcType))
return CGF.CGM.getCXXABI().EmitMemberPointerIsNotNull(CGF, Src, MPT);
// The conversion is a NOP, and will be done when CodeGening the builtin.
if (SrcType == CGF.getContext().AMDGPUFeaturePredicateTy)
return Src;
assert((SrcType->isIntegerType() || isa<llvm::PointerType>(Src->getType())) &&
"Unknown scalar type to convert");

View File

@ -610,6 +610,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::Id: \
return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier", \
{}, {Scope});
#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
case BuiltinType::Id: \
return ConvertType(getContext().getLogicalOperationType());
#include "clang/Basic/AMDGPUTypes.def"
#define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
#include "clang/Basic/HLSLIntangibleTypes.def"

View File

@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/AtomicOrdering.h"
@ -458,6 +459,19 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
static Value *GetAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
Constant *SpecId = ConstantInt::getAllOnesValue(CGF.Int32Ty);
LLVMContext &Ctx = CGF.getLLVMContext();
MDNode *Predicate = MDNode::get(Ctx, MDString::get(Ctx, Name.str()));
std::vector<Value *> Args = {SpecId, ConstantInt::getFalse(Ctx),
MetadataAsValue::get(Ctx, Predicate)};
CallInst *Call = CGF.Builder.CreateIntrinsic(
Intrinsic::spv_named_boolean_spec_constant, Args);
return Call;
}
static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
switch (BuiltinID) {
default:
@ -1014,6 +1028,23 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Value *Env = EmitScalarExpr(E->getArg(0));
return Builder.CreateCall(F, {Env});
}
case AMDGPU::BI__builtin_amdgcn_processor_is: {
assert(CGM.getTriple().isSPIRV() &&
"__builtin_amdgcn_processor_is should never reach CodeGen for "
"concrete targets!");
StringRef Proc = cast<clang::StringLiteral>(E->getArg(0))->getString();
return GetAMDGPUPredicate(*this, "is." + Proc);
}
case AMDGPU::BI__builtin_amdgcn_is_invocable: {
assert(CGM.getTriple().isSPIRV() &&
"__builtin_amdgcn_is_invocable should never reach CodeGen for "
"concrete targets!");
auto *FD = cast<FunctionDecl>(
cast<DeclRefExpr>(E->getArg(0))->getReferencedDeclOfCallee());
StringRef RF =
getContext().BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
return GetAMDGPUPredicate(*this, "has." + RF);
}
case AMDGPU::BI__builtin_amdgcn_read_exec:
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:

View File

@ -19,6 +19,7 @@
#include "clang/AST/StmtOpenACC.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/Sema/SemaInternal.h"
#include "llvm/ADT/BitVector.h"
using namespace clang;
@ -369,8 +370,10 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
case Stmt::IfStmtClass: {
IfStmt *IS = cast<IfStmt>(S);
bool AMDGPUPredicate = false;
if (!(IS->isConstexpr() || IS->isConsteval() ||
IS->isObjCAvailabilityCheck()))
IS->isObjCAvailabilityCheck() ||
(AMDGPUPredicate = this->S.AMDGPU().IsPredicate(IS->getCond()))))
break;
unsigned Diag = diag::note_protected_by_if_available;
@ -378,6 +381,8 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
Diag = diag::note_protected_by_constexpr_if;
else if (IS->isConsteval())
Diag = diag::note_protected_by_consteval_if;
else if (AMDGPUPredicate)
Diag = diag::note_amdgcn_protected_by_predicate;
if (VarDecl *Var = IS->getConditionVariable())
BuildScopeInformation(Var, ParentScope);

View File

@ -568,8 +568,13 @@ void Sema::Initialize() {
}
if (Context.getTargetInfo().getTriple().isAMDGPU() ||
(Context.getTargetInfo().getTriple().isSPIRV() &&
Context.getTargetInfo().getTriple().getVendor() == llvm::Triple::AMD) ||
(Context.getAuxTargetInfo() &&
Context.getAuxTargetInfo()->getTriple().isAMDGPU())) {
(Context.getAuxTargetInfo()->getTriple().isAMDGPU() ||
(Context.getAuxTargetInfo()->getTriple().isSPIRV() &&
Context.getAuxTargetInfo()->getTriple().getVendor() ==
llvm::Triple::AMD)))) {
#define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \
addImplicitTypedef(Name, Context.SingletonId);
#include "clang/Basic/AMDGPUTypes.def"

View File

@ -11,14 +11,24 @@
//===----------------------------------------------------------------------===//
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DynamicRecursiveASTVisitor.h"
#include "clang/AST/Expr.h"
#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Sema/Ownership.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/Sema.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/TargetParser/TargetParser.h"
#include <cstdint>
#include <utility>
namespace clang {
@ -742,4 +752,298 @@ void SemaAMDGPU::handleAMDGPUMaxNumWorkGroupsAttr(Decl *D,
addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr);
}
Expr *SemaAMDGPU::ExpandAMDGPUPredicateBuiltIn(Expr *E) {
CallExpr *CE = cast<CallExpr>(E->IgnoreParens());
ASTContext &Ctx = getASTContext();
QualType BoolTy = Ctx.getLogicalOperationType();
llvm::APInt False = llvm::APInt::getZero(Ctx.getIntWidth(BoolTy));
llvm::APInt True = llvm::APInt::getAllOnes(Ctx.getIntWidth(BoolTy));
SourceLocation Loc = CE->getExprLoc();
if (!CE->getBuiltinCallee())
return *ExpandedPredicates
.insert(IntegerLiteral::Create(Ctx, False, BoolTy, Loc))
.first;
bool P = false;
unsigned BI = CE->getBuiltinCallee();
if (Ctx.BuiltinInfo.isAuxBuiltinID(BI))
BI = Ctx.BuiltinInfo.getAuxBuiltinID(BI);
if (BI == AMDGPU::BI__builtin_amdgcn_processor_is) {
auto *GFX = dyn_cast<StringLiteral>(CE->getArg(0)->IgnoreParenCasts());
if (!GFX) {
Diag(Loc, diag::err_amdgcn_processor_is_arg_not_literal);
return nullptr;
}
StringRef N = GFX->getString();
const TargetInfo &TI = Ctx.getTargetInfo();
const TargetInfo *AuxTI = Ctx.getAuxTargetInfo();
if (!TI.isValidCPUName(N) && (!AuxTI || !AuxTI->isValidCPUName(N))) {
Diag(Loc, diag::err_amdgcn_processor_is_arg_invalid_value) << N;
SmallVector<StringRef, 32> ValidList;
if (TI.getTriple().getVendor() == llvm::Triple::VendorType::AMD)
TI.fillValidCPUList(ValidList);
else if (AuxTI) // Since the BI is present it must be an AMDGPU triple.
AuxTI->fillValidCPUList(ValidList);
if (!ValidList.empty())
Diag(Loc, diag::note_amdgcn_processor_is_valid_options)
<< llvm::join(ValidList, ", ");
return nullptr;
}
if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
CE->setType(BoolTy);
return *ExpandedPredicates.insert(CE).first;
}
if (auto TID = Ctx.getTargetInfo().getTargetID())
P = TID->find(N) == 0;
} else {
Expr *Arg = CE->getArg(0);
if (!Arg || Arg->getType() != Ctx.BuiltinFnTy) {
Diag(Loc, diag::err_amdgcn_is_invocable_arg_invalid_value) << Arg;
return nullptr;
}
if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
CE->setType(BoolTy);
return *ExpandedPredicates.insert(CE).first;
}
auto *FD = cast<FunctionDecl>(Arg->getReferencedDeclOfCallee());
StringRef RF = Ctx.BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
llvm::StringMap<bool> CF;
Ctx.getFunctionFeatureMap(CF, FD);
P = Builtin::evaluateRequiredTargetFeatures(RF, CF);
}
return *ExpandedPredicates
.insert(
IntegerLiteral::Create(Ctx, P ? True : False, BoolTy, Loc))
.first;
}
bool SemaAMDGPU::IsPredicate(Expr *E) const {
return ExpandedPredicates.contains(E);
}
void SemaAMDGPU::AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD) {
PotentiallyUnguardedBuiltinUsers.insert(FD);
}
bool SemaAMDGPU::HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const {
return PotentiallyUnguardedBuiltinUsers.contains(FD);
}
namespace {
/// This class implements -Wamdgpu-unguarded-builtin-usage.
///
/// This is done with a traversal of the AST of a function that includes a
/// call to a target specific builtin. Whenever we encounter an \c if of the
/// form: \c if(__builtin_amdgcn_is_invocable), we consider the then statement
/// guarded.
class DiagnoseUnguardedBuiltins : public DynamicRecursiveASTVisitor {
// TODO: this could eventually be extended to consider what happens when there
// are multiple target architectures specified via target("arch=gfxXXX")
// target("arch=gfxyyy") etc., as well as feature disabling via "-XXX".
Sema &SemaRef;
SmallVector<StringRef> TargetFeatures;
SmallVector<std::pair<SourceLocation, StringRef>> CurrentGFXIP;
SmallVector<unsigned> GuardedBuiltins;
static Expr *FindPredicate(Expr *Cond) {
if (auto *CE = dyn_cast<CallExpr>(Cond)) {
if (CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_is_invocable ||
CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is)
return Cond;
} else if (auto *UO = dyn_cast<UnaryOperator>(Cond)) {
return FindPredicate(UO->getSubExpr());
} else if (auto *BO = dyn_cast<BinaryOperator>(Cond)) {
if ((Cond = FindPredicate(BO->getLHS())))
return Cond;
return FindPredicate(BO->getRHS());
}
return nullptr;
}
bool EnterPredicateGuardedContext(CallExpr *P);
void ExitPredicateGuardedContext(bool WasProcessorCheck);
bool TraverseGuardedStmt(Stmt *S, CallExpr *P);
public:
DiagnoseUnguardedBuiltins(Sema &SemaRef) : SemaRef(SemaRef) {
if (auto *TAT = SemaRef.getCurFunctionDecl(true)->getAttr<TargetAttr>()) {
// We use the somewhat misnamed x86 accessors because they provide exactly
// what we require.
TAT->getX86AddedFeatures(TargetFeatures);
if (auto GFXIP = TAT->getX86Architecture())
CurrentGFXIP.emplace_back(TAT->getLocation(), *GFXIP);
}
}
bool TraverseLambdaExpr(LambdaExpr *LE) override {
if (SemaRef.AMDGPU().HasPotentiallyUnguardedBuiltinUsage(
LE->getCallOperator()))
return true; // We have already handled this.
return DynamicRecursiveASTVisitor::TraverseLambdaExpr(LE);
}
bool TraverseStmt(Stmt *S) override {
if (!S)
return true;
return DynamicRecursiveASTVisitor::TraverseStmt(S);
}
void IssueDiagnostics(Stmt *S) { TraverseStmt(S); }
bool TraverseIfStmt(IfStmt *If) override {
if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(If->getCond())))
return TraverseGuardedStmt(If, CE);
return DynamicRecursiveASTVisitor::TraverseIfStmt(If);
}
bool TraverseCaseStmt(CaseStmt *CS) override {
return TraverseStmt(CS->getSubStmt());
}
bool TraverseConditionalOperator(ConditionalOperator *CO) override {
if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(CO->getCond())))
return TraverseGuardedStmt(CO, CE);
return DynamicRecursiveASTVisitor::TraverseConditionalOperator(CO);
}
bool VisitAsmStmt(AsmStmt *ASM) override;
bool VisitCallExpr(CallExpr *CE) override;
};
bool DiagnoseUnguardedBuiltins::EnterPredicateGuardedContext(CallExpr *P) {
bool IsProcessorCheck =
P->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is;
if (IsProcessorCheck) {
StringRef G = cast<clang::StringLiteral>(P->getArg(0))->getString();
// TODO: handle generic ISAs.
if (!CurrentGFXIP.empty() && G != CurrentGFXIP.back().second) {
SemaRef.Diag(P->getExprLoc(),
diag::err_amdgcn_conflicting_is_processor_options)
<< P;
SemaRef.Diag(CurrentGFXIP.back().first,
diag::note_amdgcn_previous_is_processor_guard);
}
CurrentGFXIP.emplace_back(P->getExprLoc(), G);
} else {
auto *FD = cast<FunctionDecl>(
cast<DeclRefExpr>(P->getArg(0))->getReferencedDeclOfCallee());
GuardedBuiltins.push_back(FD->getBuiltinID());
}
return IsProcessorCheck;
}
void DiagnoseUnguardedBuiltins::ExitPredicateGuardedContext(bool WasProcCheck) {
if (WasProcCheck)
CurrentGFXIP.pop_back();
else
GuardedBuiltins.pop_back();
}
inline std::pair<Stmt *, Stmt *> GetTraversalOrder(Stmt *S) {
std::pair<Stmt *, Stmt *> Ordered;
Expr *Condition = nullptr;
if (auto *CO = dyn_cast<ConditionalOperator>(S)) {
Condition = CO->getCond();
Ordered = {CO->getTrueExpr(), CO->getFalseExpr()};
} else if (auto *If = dyn_cast<IfStmt>(S)) {
Condition = If->getCond();
Ordered = {If->getThen(), If->getElse()};
}
if (auto *UO = dyn_cast<UnaryOperator>(Condition))
if (UO->getOpcode() == UnaryOperatorKind::UO_LNot)
std::swap(Ordered.first, Ordered.second);
return Ordered;
}
bool DiagnoseUnguardedBuiltins::TraverseGuardedStmt(Stmt *S, CallExpr *P) {
assert(S && "Unexpected missing Statement!");
assert(P && "Unexpected missing Predicate!");
auto [Guarded, Unguarded] = GetTraversalOrder(S);
bool WasProcessorCheck = EnterPredicateGuardedContext(P);
bool Continue = TraverseStmt(Guarded);
ExitPredicateGuardedContext(WasProcessorCheck);
return Continue && TraverseStmt(Unguarded);
}
bool DiagnoseUnguardedBuiltins::VisitAsmStmt(AsmStmt *ASM) {
// TODO: should we check if the ASM is valid for the target? Can we?
if (!CurrentGFXIP.empty())
return true;
std::string S = ASM->generateAsmString(SemaRef.getASTContext());
SemaRef.Diag(ASM->getAsmLoc(), diag::warn_amdgcn_unguarded_asm_stmt) << S;
SemaRef.Diag(ASM->getAsmLoc(), diag::note_amdgcn_unguarded_asm_silence) << S;
return true;
}
bool DiagnoseUnguardedBuiltins::VisitCallExpr(CallExpr *CE) {
unsigned ID = CE->getBuiltinCallee();
Builtin::Context &BInfo = SemaRef.getASTContext().BuiltinInfo;
if (!ID)
return true;
if (!BInfo.isTSBuiltin(ID))
return true;
if (ID == AMDGPU::BI__builtin_amdgcn_processor_is ||
ID == AMDGPU::BI__builtin_amdgcn_is_invocable)
return true;
if (llvm::find(GuardedBuiltins, ID) != GuardedBuiltins.end())
return true;
StringRef FL(BInfo.getRequiredFeatures(ID));
llvm::StringMap<bool> FeatureMap;
if (CurrentGFXIP.empty()) {
for (auto &&F : TargetFeatures)
FeatureMap[F] = true;
for (auto &&GID : GuardedBuiltins)
for (auto &&F : llvm::split(BInfo.getRequiredFeatures(GID), ','))
FeatureMap[F] = true;
} else {
static const llvm::Triple AMDGCN("amdgcn-amd-amdhsa");
llvm::AMDGPU::fillAMDGPUFeatureMap(CurrentGFXIP.back().second, AMDGCN,
FeatureMap);
}
FunctionDecl *BI = CE->getDirectCallee();
SourceLocation BICallLoc = CE->getExprLoc();
if (Builtin::evaluateRequiredTargetFeatures(FL, FeatureMap)) {
SemaRef.Diag(BICallLoc, diag::warn_amdgcn_unguarded_builtin) << BI;
SemaRef.Diag(BICallLoc, diag::note_amdgcn_unguarded_builtin_silence) << BI;
} else {
StringRef GFXIP = CurrentGFXIP.empty() ? "" : CurrentGFXIP.back().second;
SemaRef.Diag(BICallLoc, diag::err_amdgcn_incompatible_builtin)
<< BI << FL << !CurrentGFXIP.empty() << GFXIP;
if (!CurrentGFXIP.empty())
SemaRef.Diag(CurrentGFXIP.back().first,
diag::note_amdgcn_previous_is_processor_guard);
}
return true;
}
} // Unnamed namespace
void SemaAMDGPU::DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD) {
DiagnoseUnguardedBuiltins(SemaRef).IssueDiagnostics(FD->getBody());
}
} // namespace clang

View File

@ -23,6 +23,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Initialization.h"
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/Sema/SemaHLSL.h"
#include "clang/Sema/SemaObjC.h"
#include "clang/Sema/SemaRISCV.h"
@ -1592,6 +1593,13 @@ static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr,
return TC_Success;
}
if (SrcType == Self.Context.AMDGPUFeaturePredicateTy &&
DestType == Self.Context.getLogicalOperationType()) {
SrcExpr = Self.AMDGPU().ExpandAMDGPUPredicateBuiltIn(SrcExpr.get());
Kind = CK_NoOp;
return TC_Success;
}
// We tried everything. Everything! Nothing works! :-(
return TC_NotApplicable;
}

View File

@ -47,6 +47,7 @@
#include "clang/Sema/ParsedTemplate.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/Sema/SemaARM.h"
#include "clang/Sema/SemaCUDA.h"
#include "clang/Sema/SemaHLSL.h"
@ -13940,6 +13941,16 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
return;
}
// __amdgpu_feature_predicate_t cannot be initialised
if (VDecl->getType().getDesugaredType(Context) ==
Context.AMDGPUFeaturePredicateTy) {
Diag(VDecl->getLocation(),
diag::err_amdgcn_predicate_type_is_not_constructible)
<< VDecl;
VDecl->setInvalidDecl();
return;
}
// WebAssembly tables can't be used to initialise a variable.
if (!Init->getType().isNull() && Init->getType()->isWebAssemblyTableType()) {
Diag(Init->getExprLoc(), diag::err_wasm_table_art) << 0;
@ -14462,6 +14473,13 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
if (VarDecl *Var = dyn_cast<VarDecl>(RealDecl)) {
QualType Type = Var->getType();
if (Type.getDesugaredType(Context) == Context.AMDGPUFeaturePredicateTy) {
Diag(Var->getLocation(),
diag::err_amdgcn_predicate_type_is_not_constructible)
<< Var;
Var->setInvalidDecl();
return;
}
// C++1z [dcl.dcl]p1 grammar implies that an initializer is mandatory.
if (isa<DecompositionDecl>(RealDecl)) {
Diag(Var->getLocation(), diag::err_decomp_decl_requires_init) << Var;
@ -17021,8 +17039,12 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, bool IsInstantiation,
return nullptr;
}
if (Body && FSI->HasPotentialAvailabilityViolations)
DiagnoseUnguardedAvailabilityViolations(dcl);
if (Body) {
if (FSI->HasPotentialAvailabilityViolations)
DiagnoseUnguardedAvailabilityViolations(dcl);
else if (AMDGPU().HasPotentiallyUnguardedBuiltinUsage(FD))
AMDGPU().DiagnoseUnguardedBuiltinUsage(FD);
}
assert(!FSI->ObjCShouldCallSuper &&
"This should only be set for ObjC methods, which should have been "

View File

@ -53,6 +53,7 @@
#include "clang/Sema/ParsedTemplate.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/Sema/SemaARM.h"
#include "clang/Sema/SemaCUDA.h"
#include "clang/Sema/SemaFixItUtils.h"
@ -6779,6 +6780,22 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
if (Result.isInvalid()) return ExprError();
Fn = Result.get();
// The __builtin_amdgcn_is_invocable builtin is special, and will be resolved
// later, when we check boolean conditions, for now we merely forward it
// without any additional checking.
if (Fn->getType() == Context.BuiltinFnTy && ArgExprs.size() == 1 &&
ArgExprs[0]->getType() == Context.BuiltinFnTy) {
const auto *FD = cast<FunctionDecl>(Fn->getReferencedDeclOfCallee());
if (FD->getName() == "__builtin_amdgcn_is_invocable") {
QualType FnPtrTy = Context.getPointerType(FD->getType());
Expr *R = ImpCastExprToType(Fn, FnPtrTy, CK_BuiltinFnToFnPtr).get();
return CallExpr::Create(
Context, R, ArgExprs, Context.AMDGPUFeaturePredicateTy,
ExprValueKind::VK_PRValue, RParenLoc, FPOptionsOverride());
}
}
if (CheckArgsForPlaceholders(ArgExprs))
return ExprError();
@ -6897,6 +6914,15 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
FunctionDecl *FDecl = dyn_cast<FunctionDecl>(NDecl);
if (FDecl && FDecl->getBuiltinID()) {
const llvm::Triple &Triple = Context.getTargetInfo().getTriple();
if (Triple.isSPIRV() && Triple.getVendor() == llvm::Triple::AMD) {
if (Context.BuiltinInfo.isTSBuiltin(FDecl->getBuiltinID()) &&
!Context.BuiltinInfo.isAuxBuiltinID(FDecl->getBuiltinID())) {
AMDGPU().AddPotentiallyUnguardedBuiltinUser(cast<FunctionDecl>(
getFunctionLevelDeclContext(/*AllowLambda=*/true)));
}
}
// Rewrite the function decl for this builtin by replacing parameters
// with no explicit address space with the address space of the arguments
// in ArgExprs.
@ -13905,6 +13931,11 @@ inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS,
if (RHS.isInvalid())
return QualType();
if (LHS.get()->getType() == Context.AMDGPUFeaturePredicateTy)
LHS = AMDGPU().ExpandAMDGPUPredicateBuiltIn(LHS.get());
if (RHS.get()->getType() == Context.AMDGPUFeaturePredicateTy)
RHS = AMDGPU().ExpandAMDGPUPredicateBuiltIn(RHS.get());
if (!LHS.get()->getType()->isScalarType() ||
!RHS.get()->getType()->isScalarType())
return InvalidOperands(Loc, LHS, RHS);
@ -16315,6 +16346,10 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
// Vector logical not returns the signed variant of the operand type.
resultType = GetSignedVectorType(resultType);
break;
} else if (resultType == Context.AMDGPUFeaturePredicateTy) {
resultType = Context.getLogicalOperationType();
Input = AMDGPU().ExpandAMDGPUPredicateBuiltIn(InputExpr);
break;
} else {
return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
<< resultType << Input.get()->getSourceRange());
@ -21176,6 +21211,9 @@ ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E,
E = result.get();
if (!E->isTypeDependent()) {
if (E->getType() == Context.AMDGPUFeaturePredicateTy)
return AMDGPU().ExpandAMDGPUPredicateBuiltIn(E);
if (getLangOpts().CPlusPlus)
return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4

View File

@ -9244,6 +9244,15 @@ bool InitializationSequence::Diagnose(Sema &S,
case FK_ConversionFailed: {
QualType FromType = OnlyArg->getType();
// __amdgpu_feature_predicate_t can be explicitly cast to the logical op
// type, although this is almost always an error and we advise against it.
if (FromType == S.Context.AMDGPUFeaturePredicateTy &&
DestType == S.Context.getLogicalOperationType()) {
S.Diag(OnlyArg->getExprLoc(),
diag::err_amdgcn_predicate_type_needs_explicit_bool_cast)
<< OnlyArg << DestType;
break;
}
PartialDiagnostic PDiag = S.PDiag(diag::err_init_conversion_failed)
<< (int)Entity.getKind()
<< DestType
@ -10056,6 +10065,14 @@ Sema::PerformCopyInitialization(const InitializedEntity &Entity,
if (EqualLoc.isInvalid())
EqualLoc = InitE->getBeginLoc();
if (Entity.getType().getDesugaredType(Context) ==
Context.AMDGPUFeaturePredicateTy &&
Entity.getDecl()) {
Diag(EqualLoc, diag::err_amdgcn_predicate_type_is_not_constructible)
<< Entity.getDecl();
return ExprError();
}
InitializationKind Kind = InitializationKind::CreateCopy(
InitE->getBeginLoc(), EqualLoc, AllowExplicit);
InitializationSequence Seq(*this, Entity, Kind, InitE, TopLevelOfInitList);

View File

@ -30,6 +30,7 @@
#include "clang/Sema/Initialization.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Overload.h"
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/Sema/SemaARM.h"
#include "clang/Sema/SemaCUDA.h"
#include "clang/Sema/SemaObjC.h"
@ -6365,12 +6366,13 @@ TryContextuallyConvertToBool(Sema &S, Expr *From) {
ExprResult Sema::PerformContextuallyConvertToBool(Expr *From) {
if (checkPlaceholderForOverload(*this, From))
return ExprError();
if (From->getType() == Context.AMDGPUFeaturePredicateTy)
return AMDGPU().ExpandAMDGPUPredicateBuiltIn(From);
ImplicitConversionSequence ICS = TryContextuallyConvertToBool(*this, From);
if (!ICS.isBad())
return PerformImplicitConversion(From, Context.BoolTy, ICS,
AssignmentAction::Converting);
if (!DiagnoseMultipleUserDefinedConversion(From, Context.BoolTy))
return Diag(From->getBeginLoc(), diag::err_typecheck_bool_condition)
<< From->getType() << From->getSourceRange();
@ -12218,6 +12220,16 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
if (TakingCandidateAddress && !checkAddressOfCandidateIsAvailable(S, Fn))
return;
// __amdgpu_feature_predicate_t can be explicitly cast to the logical op type,
// although this is almost always an error and we advise against it.
if (FromTy == S.Context.AMDGPUFeaturePredicateTy &&
ToTy == S.Context.getLogicalOperationType()) {
S.Diag(Conv.Bad.FromExpr->getExprLoc(),
diag::err_amdgcn_predicate_type_needs_explicit_bool_cast)
<< Conv.Bad.FromExpr << ToTy;
return;
}
// Emit the generic diagnostic and, optionally, add the hints to it.
PartialDiagnostic FDiag = S.PDiag(diag::note_ovl_candidate_bad_conv);
FDiag << (unsigned)FnKindPair.first << (unsigned)FnKindPair.second << FnDesc

View File

@ -0,0 +1,72 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX900 %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX1010 %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCNSPIRV %s
// Test that, depending on triple and, if applicable, target-cpu, one of three
// things happens:
// 1) for gfx900 we emit an empty kernel (concrete target, lacks feature)
// 2) for gfx1010 we emit a call to trap (concrete target, has feature)
// 3) for AMDGCNSPIRV we emit a boolean specialisation constant, via a call
// to __spirv_SpecConstant, with the id of UINT32_MAX, and the boolean
// value of false, which will yield an OpSpecConstantFalse in SPIR-V
// AMDGCN-GFX900-LABEL: define dso_local void @foo(
// AMDGCN-GFX900-SAME: ) #[[ATTR0:[0-9]+]] {
// AMDGCN-GFX900-NEXT: [[ENTRY:.*:]]
// AMDGCN-GFX900-NEXT: ret void
//
// AMDGCN-GFX1010-LABEL: define dso_local void @foo(
// AMDGCN-GFX1010-SAME: ) #[[ATTR0:[0-9]+]] {
// AMDGCN-GFX1010-NEXT: [[ENTRY:.*:]]
// AMDGCN-GFX1010-NEXT: call void @llvm.trap()
// AMDGCN-GFX1010-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: define spir_func void @foo(
// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META2:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL:%.*]] = icmp ne i1 [[TMP0]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[LOR_LHS_FALSE:.*]]
// AMDGCNSPIRV: [[LOR_LHS_FALSE]]:
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META3:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL1:%.*]] = icmp ne i1 [[TMP1]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL1]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE2:.*]]
// AMDGCNSPIRV: [[LOR_LHS_FALSE2]]:
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META4:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL3:%.*]] = icmp ne i1 [[TMP2]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL3]], label %[[IF_THEN]], label %[[IF_END:.*]]
// AMDGCNSPIRV: [[IF_THEN]]:
// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.trap()
// AMDGCNSPIRV-NEXT: br label %[[IF_END]]
// AMDGCNSPIRV: [[IF_END]]:
// AMDGCNSPIRV-NEXT: ret void
//
void foo() {
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16) ||
(__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16_var)) ||
(__builtin_amdgcn_is_invocable(__builtin_amdgcn_ashr_pk_i8_i32)))
return __builtin_trap();
}
//.
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" }
//.
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" }
// AMDGCN-GFX1010: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
//.
// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
// AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind }
// AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
//.
// AMDGCN-GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// AMDGCN-GFX900: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
// AMDGCN-GFX1010: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// AMDGCN-GFX1010: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
// AMDGCNSPIRV: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// AMDGCNSPIRV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
// AMDGCNSPIRV: [[META2]] = !{!"has.gfx10-insts"}
// AMDGCNSPIRV: [[META3]] = !{!"has.gfx12-insts"}
// AMDGCNSPIRV: [[META4]] = !{!"has.ashr-pk-insts"}
//.

View File

@ -0,0 +1,83 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX900 %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX1010 %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCNSPIRV %s
// Test that, depending on triple and, if applicable, target-cpu, one of three
// things happens:
// 1) for gfx900 we emit a call to trap (concrete target, matches)
// 2) for gfx1010 we emit an empty kernel (concrete target, does not match)
// 3) for AMDGCNSPIRV we emit a boolean specialisation constant, via a call
// to __spirv_SpecConstant, with the id of UINT32_MAX, and the boolean
// value of false, which will yield an OpSpecConstantFalse in SPIR-V
// AMDGCN-GFX900-LABEL: define dso_local void @foo(
// AMDGCN-GFX900-SAME: ) #[[ATTR0:[0-9]+]] {
// AMDGCN-GFX900-NEXT: [[ENTRY:.*:]]
// AMDGCN-GFX900-NEXT: call void @llvm.trap()
// AMDGCN-GFX900-NEXT: ret void
//
// AMDGCN-GFX1010-LABEL: define dso_local void @foo(
// AMDGCN-GFX1010-SAME: ) #[[ATTR0:[0-9]+]] {
// AMDGCN-GFX1010-NEXT: [[ENTRY:.*:]]
// AMDGCN-GFX1010-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: define spir_func void @foo(
// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META2:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL:%.*]] = icmp ne i1 [[TMP0]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[LOR_LHS_FALSE:.*]]
// AMDGCNSPIRV: [[LOR_LHS_FALSE]]:
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META3:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL1:%.*]] = icmp ne i1 [[TMP1]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL1]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE2:.*]]
// AMDGCNSPIRV: [[LOR_LHS_FALSE2]]:
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META4:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL3:%.*]] = icmp ne i1 [[TMP2]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL3]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE4:.*]]
// AMDGCNSPIRV: [[LOR_LHS_FALSE4]]:
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META5:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL5:%.*]] = icmp ne i1 [[TMP3]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL5]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE6:.*]]
// AMDGCNSPIRV: [[LOR_LHS_FALSE6]]:
// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META6:![0-9]+]])
// AMDGCNSPIRV-NEXT: [[TOBOOL7:%.*]] = icmp ne i1 [[TMP4]], false
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL7]], label %[[IF_THEN]], label %[[IF_END:.*]]
// AMDGCNSPIRV: [[IF_THEN]]:
// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.trap()
// AMDGCNSPIRV-NEXT: br label %[[IF_END]]
// AMDGCNSPIRV: [[IF_END]]:
// AMDGCNSPIRV-NEXT: ret void
//
void foo() {
if (__builtin_amdgcn_processor_is("gfx900") ||
__builtin_amdgcn_processor_is("gfx906") ||
__builtin_amdgcn_processor_is("gfx90c") ||
(__builtin_amdgcn_processor_is("gfx90a")) ||
(__builtin_amdgcn_processor_is("gfx942")))
return __builtin_trap();
}
//.
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" }
// AMDGCN-GFX900: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
//.
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" }
//.
// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
// AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind }
// AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
//.
// AMDGCN-GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// AMDGCN-GFX900: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
// AMDGCN-GFX1010: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// AMDGCN-GFX1010: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
// AMDGCNSPIRV: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// AMDGCNSPIRV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
// AMDGCNSPIRV: [[META2]] = !{!"is.gfx900"}
// AMDGCNSPIRV: [[META3]] = !{!"is.gfx906"}
// AMDGCNSPIRV: [[META4]] = !{!"is.gfx90c"}
// AMDGCNSPIRV: [[META5]] = !{!"is.gfx90a"}
// AMDGCNSPIRV: [[META6]] = !{!"is.gfx942"}
//.

View File

@ -0,0 +1,48 @@
// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - 2>&1 | FileCheck %s
// RUN: not %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - 2>&1 | FileCheck %s
bool predicate(bool x);
void pass_by_value(__amdgpu_feature_predicate_t x);
void invalid_uses(int *p, int x, const __amdgpu_feature_predicate_t &lv,
__amdgpu_feature_predicate_t &&rv) {
// CHECK: error: 'a' has type __amdgpu_feature_predicate_t, which is not constructible
__amdgpu_feature_predicate_t a;
// CHECK: error: 'b' has type __amdgpu_feature_predicate_t, which is not constructible
__amdgpu_feature_predicate_t b = __builtin_amdgcn_processor_is("gfx906");
// CHECK: error: 'c' has type __amdgpu_feature_predicate_t, which is not constructible
__amdgpu_feature_predicate_t c = lv;
// CHECK: error: 'd' has type __amdgpu_feature_predicate_t, which is not constructible
__amdgpu_feature_predicate_t d = rv;
// CHECK: error: '__builtin_amdgcn_processor_is("gfx906")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
bool invalid_use_in_init_0 = __builtin_amdgcn_processor_is("gfx906");
// CHECK: error: 'x' has type __amdgpu_feature_predicate_t, which is not constructible
pass_by_value(__builtin_amdgcn_processor_is("gfx906"));
// CHECK: error: '__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
bool invalid_use_in_init_1 = __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
// CHECK: error: '__builtin_amdgcn_processor_is("gfx906")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
if (bool invalid_use_in_init_2 = __builtin_amdgcn_processor_is("gfx906")) return;
// CHECK: error: '__builtin_amdgcn_processor_is("gfx1200")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
if (predicate(__builtin_amdgcn_processor_is("gfx1200"))) __builtin_amdgcn_s_sleep_var(x);
}
void invalid_invocations(int x, const char* str) {
// CHECK: error: the argument to __builtin_amdgcn_processor_is must be a valid AMDGCN processor identifier; 'not_an_amdgcn_gfx_id' is not valid
// CHECK-DAG: note: valid AMDGCN processor identifiers are: {{.*}}gfx{{.*}}
if (__builtin_amdgcn_processor_is("not_an_amdgcn_gfx_id")) return;
// CHECK: error: the argument to __builtin_amdgcn_processor_is must be a string literal
if (__builtin_amdgcn_processor_is(str)) return;
// CHECK: error: the argument to __builtin_amdgcn_is_invocable must be either a target agnostic builtin or an AMDGCN target specific builtin; {{.*}}__builtin_amdgcn_s_sleep_var{{.*}} is not valid
if (__builtin_amdgcn_is_invocable("__builtin_amdgcn_s_sleep_var")) return;
// CHECK: error: the argument to __builtin_amdgcn_is_invocable must be either a target agnostic builtin or an AMDGCN target specific builtin; {{.*}}str{{.*}} is not valid
else if (__builtin_amdgcn_is_invocable(str)) return;
// CHECK: error: the argument to __builtin_amdgcn_is_invocable must be either a target agnostic builtin or an AMDGCN target specific builtin; {{.*}}x{{.*}} is not valid
else if (__builtin_amdgcn_is_invocable(x)) return;
// CHECK: error: use of undeclared identifier '__builtin_ia32_pause'
else if (__builtin_amdgcn_is_invocable(__builtin_ia32_pause)) return;
}
bool return_needs_cast() {
// CHECK: error: '__builtin_amdgcn_processor_is("gfx900")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
return __builtin_amdgcn_processor_is("gfx900");
}

View File

@ -5,7 +5,7 @@
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \
// RUN: -aux-triple x86_64-pc-windows-msvc -fcuda-is-device -emit-llvm %s \
// RUN: -o - | FileCheck %s
// RUN: -o - | FileCheck %s --check-prefix=AMDGCNSPIRV
#include "Inputs/cuda.h"
@ -28,6 +28,25 @@
// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z16use_dispatch_ptrPi(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[DISPATCH_PTR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast ptr [[DISPATCH_PTR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void use_dispatch_ptr(int* out) {
const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr();
*out = *dispatch_ptr;
@ -52,6 +71,25 @@ __global__ void use_dispatch_ptr(int* out) {
// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z13use_queue_ptrPi(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[QUEUE_PTR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[QUEUE_PTR_ASCAST:%.*]] = addrspacecast ptr [[QUEUE_PTR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.queue.ptr()
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void use_queue_ptr(int* out) {
const int* queue_ptr = (const int*)__builtin_amdgcn_queue_ptr();
*out = *queue_ptr;
@ -76,14 +114,30 @@ __global__ void use_queue_ptr(int* out) {
// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z19use_implicitarg_ptrPi(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[IMPLICITARG_PTR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[IMPLICITARG_PTR_ASCAST:%.*]] = addrspacecast ptr [[IMPLICITARG_PTR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void use_implicitarg_ptr(int* out) {
const int* implicitarg_ptr = (const int*)__builtin_amdgcn_implicitarg_ptr();
*out = *implicitarg_ptr;
}
__global__
//
void
// CHECK-LABEL: @_Z12test_ds_fmaxf(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
@ -96,7 +150,21 @@ __global__
// CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
test_ds_fmax(float src) {
// AMDGCNSPIRV-LABEL: @_Z12test_ds_fmaxf(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw fmax ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]] monotonic, align 4
// AMDGCNSPIRV-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_ds_fmax(float src) {
//
//
__shared__ float shared;
volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false);
}
@ -113,6 +181,18 @@ __global__
// CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z12test_ds_faddf(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspace(3) @_ZZ12test_ds_faddfE6shared, float [[TMP0]] monotonic, align 4
// AMDGCNSPIRV-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_ds_fadd(float src) {
__shared__ float shared;
volatile float x = __builtin_amdgcn_ds_faddf(&shared, src, 0, 0, false);
@ -139,6 +219,27 @@ __global__ void test_ds_fadd(float src) {
// CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z12test_ds_fminfPf(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[SHARED:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr [[SHARED]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3)
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
// AMDGCNSPIRV-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_ds_fmin(float src, float *shared) {
volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false);
}
@ -155,6 +256,11 @@ __device__ void test_ret_builtin_nondef_addrspace() {
// CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.endpgm()
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z6endpgmv(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.amdgcn.endpgm()
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void endpgm() {
__builtin_amdgcn_endpgm();
}
@ -183,6 +289,28 @@ __global__ void endpgm() {
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[TMP3]], align 8
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z14test_uicmp_i64Pyyy(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// AMDGCNSPIRV-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store i64 [[A:%.*]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store i64 [[B:%.*]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(4) [[B_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP0]], i64 [[TMP1]], i32 35)
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[TMP3]], align 8
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b)
{
*out = __builtin_amdgcn_uicmpl(a, b, 30+5);
@ -199,14 +327,39 @@ __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, un
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
// CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
// CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[TMP1]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META5:![0-9]+]])
// CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
// CHECK: if.then:
// CHECK-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[TMP2]], align 8
// CHECK-NEXT: br label [[IF_END]]
// CHECK: if.end:
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z14test_s_memtimePy(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META7:![0-9]+]])
// AMDGCNSPIRV-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
// AMDGCNSPIRV: if.then:
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[TMP2]], align 8
// AMDGCNSPIRV-NEXT: br label [[IF_END]]
// AMDGCNSPIRV: if.end:
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_s_memtime(unsigned long long* out)
{
*out = __builtin_amdgcn_s_memtime();
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memtime))
*out = __builtin_amdgcn_s_memtime();
}
// Check a generic pointer can be passed as a shared pointer and a generic pointer.
@ -232,9 +385,32 @@ __device__ void func(float *x);
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR7:[0-9]+]]
// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR8:[0-9]+]]
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z17test_ds_fmin_funcfPf(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[SHARED:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
// AMDGCNSPIRV-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr [[SHARED]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3)
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
// AMDGCNSPIRV-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR8:[0-9]+]]
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false);
func(shared);
@ -258,6 +434,24 @@ __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
// CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z14test_is_sharedPf(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[RET:%.*]] = alloca i8, align 1
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.shared(ptr [[TMP1]])
// AMDGCNSPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8
// AMDGCNSPIRV-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_is_shared(float *x){
bool ret = __builtin_amdgcn_is_shared(x);
}
@ -280,6 +474,24 @@ __global__ void test_is_shared(float *x){
// CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
// CHECK-NEXT: ret void
//
// AMDGCNSPIRV-LABEL: @_Z15test_is_privatePi(
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// AMDGCNSPIRV-NEXT: [[RET:%.*]] = alloca i8, align 1
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4)
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.private(ptr [[TMP1]])
// AMDGCNSPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8
// AMDGCNSPIRV-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
// AMDGCNSPIRV-NEXT: ret void
//
__global__ void test_is_private(int *x){
bool ret = __builtin_amdgcn_is_private(x);
}

View File

@ -21,26 +21,30 @@ constexpr static bool BountCtrl()
return true & false;
}
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 16, i32 0, i32 0, i1 false)
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
}
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 4, i32 0, i1 false)
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
}
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 3, i1 false)
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
}
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 0, i1 false)
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
}

View File

@ -12,78 +12,88 @@ typedef unsigned long ulong;
// CHECK-LABEL: @test_permlane16(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane16.i32(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane16))
*out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
}
// CHECK-LABEL: @test_permlanex16(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlanex16.i32(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16))
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
}
// CHECK-LABEL: @test_mov_dpp8_uint(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
// CHECK-NEXT: store i32 %0,
// CHECK-NEXT: store i32 %[[#]],
void test_mov_dpp8_uint(global uint* out, uint a) {
*out = __builtin_amdgcn_mov_dpp8(a, 1);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
*out = __builtin_amdgcn_mov_dpp8(a, 1);
}
// CHECK-LABEL: @test_mov_dpp8_long(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %a, i32 1)
// CHECK-NEXT: store i64 %0,
// CHECK-NEXT: store i64 %[[#]],
void test_mov_dpp8_long(global long* out, long a) {
*out = __builtin_amdgcn_mov_dpp8(a, 1);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
*out = __builtin_amdgcn_mov_dpp8(a, 1);
}
// CHECK-LABEL: @test_mov_dpp8_float(
// CHECK: %0 = bitcast float %a to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
// CHECK-NEXT: store i32 %1,
// CHECK: %[[BC:[0-9]+]] = bitcast float %a to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[BC]], i32 1)
// CHECK-NEXT: store i32 %[[DPP_RET]],
void test_mov_dpp8_float(global float* out, float a) {
*out = __builtin_amdgcn_mov_dpp8(a, 1);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
*out = __builtin_amdgcn_mov_dpp8(a, 1);
}
// CHECK-LABEL: @test_mov_dpp8_double
// CHECK: %0 = bitcast double %x to i64
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1)
// CHECK-NEXT: store i64 %1,
// CHECK: %[[BC:[0-9]+]] = bitcast double %x to i64
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %[[BC]], i32 1)
// CHECK-NEXT: store i64 %[[DPP_RET]],
void test_mov_dpp8_double(double x, global double *p) {
*p = __builtin_amdgcn_mov_dpp8(x, 1);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
*p = __builtin_amdgcn_mov_dpp8(x, 1);
}
// CHECK-LABEL: @test_mov_dpp8_short
// CHECK: %0 = zext i16 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
// CHECK-NEXT: %2 = trunc i32 %1 to i16
// CHECK-NEXT: store i16 %2,
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[ZEXT]], i32 1)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
// CHECK-NEXT: store i16 %[[TRUNC]],
void test_mov_dpp8_short(short x, global short *p) {
*p = __builtin_amdgcn_mov_dpp8(x, 1);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
*p = __builtin_amdgcn_mov_dpp8(x, 1);
}
// CHECK-LABEL: @test_mov_dpp8_char
// CHECK: %0 = zext i8 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
// CHECK-NEXT: %2 = trunc i32 %1 to i8
// CHECK-NEXT: store i8 %2,
// CHECK: %[[ZEXT:[0-9]+]] = zext i8 %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[ZEXT]], i32 1)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i8
// CHECK-NEXT: store i8 %[[TRUNC]],
void test_mov_dpp8_char(char x, global char *p) {
*p = __builtin_amdgcn_mov_dpp8(x, 1);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
*p = __builtin_amdgcn_mov_dpp8(x, 1);
}
// CHECK-LABEL: @test_mov_dpp8_half
// CHECK: %0 = load i16,
// CHECK: %1 = zext i16 %0 to i32
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %1, i32 1)
// CHECK-NEXT: %3 = trunc i32 %2 to i16
// CHECK-NEXT: store i16 %3,
// CHECK: %[[LD:[0-9]+]] = load i16,
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %[[LD]] to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[ZEXT]], i32 1)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
// CHECK-NEXT: store i16 %[[TRUNC]],
void test_mov_dpp8_half(half *x, global half *p) {
*p = __builtin_amdgcn_mov_dpp8(*x, 1);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
*p = __builtin_amdgcn_mov_dpp8(*x, 1);
}
// CHECK-LABEL: @test_s_memtime
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
void test_s_memtime(global ulong* out)
{
*out = __builtin_amdgcn_s_memtime();
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memtime))
*out = __builtin_amdgcn_s_memtime();
}
// CHECK-LABEL: @test_groupstaticsize
@ -97,5 +107,6 @@ void test_groupstaticsize(global uint* out)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
void test_ballot_wave32(global uint* out, int a, int b)
{
*out = __builtin_amdgcn_ballot_w32(a == b);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ballot_w32))
*out = __builtin_amdgcn_ballot_w32(a == b);
}

View File

@ -18,36 +18,41 @@ typedef uint uint4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: @test_s_sendmsg_rtn(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
void test_s_sendmsg_rtn(global uint* out) {
*out = __builtin_amdgcn_s_sendmsg_rtn(0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sendmsg_rtn))
*out = __builtin_amdgcn_s_sendmsg_rtn(0);
}
// CHECK-LABEL: @test_s_sendmsg_rtnl(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 0)
void test_s_sendmsg_rtnl(global ulong* out) {
*out = __builtin_amdgcn_s_sendmsg_rtnl(0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sendmsg_rtnl))
*out = __builtin_amdgcn_s_sendmsg_rtnl(0);
}
// CHECK-LABEL: @test_ds_bvh_stack_rtn(
// CHECK: %0 = tail call{{.*}} { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
// CHECK: %1 = extractvalue { i32, i32 } %0, 0
// CHECK: %2 = extractvalue { i32, i32 } %0, 1
// CHECK: %3 = insertelement <2 x i32> poison, i32 %1, i64 0
// CHECK: %4 = insertelement <2 x i32> %3, i32 %2, i64 1
// CHECK: %[[#BVH_STACK:]] = tail call{{.*}} { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
// CHECK: %[[#RET_FIRST:]] = extractvalue { i32, i32 } %[[#BVH_STACK]], 0
// CHECK: %[[#RET_SECOND:]] = extractvalue { i32, i32 } %[[#BVH_STACK]], 1
// CHECK: %[[#OUT:]] = insertelement <2 x i32> poison, i32 %[[#RET_FIRST]], i64 0
// CHECK: %{{.*}} = insertelement <2 x i32> %[[#OUT]], i32 %[[#RET_SECOND]], i64 1
void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
{
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_bvh_stack_rtn))
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
}
// CHECK-LABEL: @test_permlane64(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane64.i32(i32 %a)
void test_permlane64(global uint* out, uint a) {
*out = __builtin_amdgcn_permlane64(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64))
*out = __builtin_amdgcn_permlane64(a);
}
// CHECK-LABEL: @test_s_wait_event_export_ready
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.wait.event.export.ready
void test_s_wait_event_export_ready() {
__builtin_amdgcn_s_wait_event_export_ready();
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready))
__builtin_amdgcn_s_wait_event_export_ready();
}
// CHECK-LABEL: @test_global_add_f32
@ -58,5 +63,6 @@ void test_global_add_f32(float *rtn, global float *addr, float x) {
#else
void test_global_add_f32(float *rtn, __attribute__((address_space(1))) float *addr, float x) {
#endif
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_atomic_fadd_f32))
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
}

View File

@ -1,9 +1,9 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV --enable-var-scope %s
#define INVALID_MEMORY_SCOPE (__MEMORY_SCOPE_CLUSTR+1)
@ -16,42 +16,48 @@ typedef unsigned int uint;
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.div.fixup.f16
void test_div_fixup_f16(global half* out, half a, half b, half c)
{
*out = __builtin_amdgcn_div_fixuph(a, b, c);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_div_fixuph))
*out = __builtin_amdgcn_div_fixuph(a, b, c);
}
// CHECK-LABEL: @test_rcp_f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rcp.f16
void test_rcp_f16(global half* out, half a)
{
*out = __builtin_amdgcn_rcph(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_rcph))
*out = __builtin_amdgcn_rcph(a);
}
// CHECK-LABEL: @test_sqrt_f16
// CHECK: {{.*}}call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16
void test_sqrt_f16(global half* out, half a)
{
*out = __builtin_amdgcn_sqrth(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_sqrth))
*out = __builtin_amdgcn_sqrth(a);
}
// CHECK-LABEL: @test_rsq_f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rsq.f16
void test_rsq_f16(global half* out, half a)
{
*out = __builtin_amdgcn_rsqh(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_rsqh))
*out = __builtin_amdgcn_rsqh(a);
}
// CHECK-LABEL: @test_sin_f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.sin.f16
void test_sin_f16(global half* out, half a)
{
*out = __builtin_amdgcn_sinh(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_sinh))
*out = __builtin_amdgcn_sinh(a);
}
// CHECK-LABEL: @test_cos_f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.cos.f16
void test_cos_f16(global half* out, half a)
{
*out = __builtin_amdgcn_cosh(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_cosh))
*out = __builtin_amdgcn_cosh(a);
}
// CHECK-LABEL: @test_ldexp_f16
@ -59,179 +65,202 @@ void test_cos_f16(global half* out, half a)
// CHECK: {{.*}}call{{.*}} half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
void test_ldexp_f16(global half* out, half a, int b)
{
*out = __builtin_amdgcn_ldexph(a, b);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ldexph))
*out = __builtin_amdgcn_ldexph(a, b);
}
// CHECK-LABEL: @test_frexp_mant_f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16
void test_frexp_mant_f16(global half* out, half a)
{
*out = __builtin_amdgcn_frexp_manth(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_frexp_manth))
*out = __builtin_amdgcn_frexp_manth(a);
}
// CHECK-LABEL: @test_frexp_exp_f16
// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16
void test_frexp_exp_f16(global short* out, half a)
{
*out = __builtin_amdgcn_frexp_exph(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_frexp_exph))
*out = __builtin_amdgcn_frexp_exph(a);
}
// CHECK-LABEL: @test_fract_f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16
void test_fract_f16(global half* out, half a)
{
*out = __builtin_amdgcn_fracth(a);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_fracth))
*out = __builtin_amdgcn_fracth(a);
}
// CHECK-LABEL: @test_class_f16
// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16
void test_class_f16(global half* out, half a, int b)
{
*out = __builtin_amdgcn_classh(a, b);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_classh))
*out = __builtin_amdgcn_classh(a, b);
}
// CHECK-LABEL: @test_s_memrealtime
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime()
void test_s_memrealtime(global ulong* out)
{
*out = __builtin_amdgcn_s_memrealtime();
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memrealtime))
*out = __builtin_amdgcn_s_memrealtime();
}
// CHECK-LABEL: @test_s_dcache_wb()
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb()
void test_s_dcache_wb()
{
__builtin_amdgcn_s_dcache_wb();
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_dcache_wb))
__builtin_amdgcn_s_dcache_wb();
}
// CHECK-LABEL: @test_mov_dpp_int
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
void test_mov_dpp_int(global int* out, int src)
{
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
}
// CHECK-LABEL: @test_mov_dpp_long
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %0,
// CHECK: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %[[DPP_RET]],
void test_mov_dpp_long(long x, global long *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_mov_dpp_float
// CHECK: %0 = bitcast float %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i32 %1,
// CHECK: %[[BC:[0-9]+]] = bitcast float %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[BC]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i32 %[[DPP_RET]],
void test_mov_dpp_float(float x, global float *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_mov_dpp_double
// CHECK: %0 = bitcast double %x to i64
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %1,
// CHECK: %[[BC:[0-9]+]] = bitcast double %x to i64
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %[[BC]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %[[DPP_RET]],
void test_mov_dpp_double(double x, global double *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_mov_dpp_short
// CHECK: %0 = zext i16 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i16
// CHECK-NEXT: store i16 %2,
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
// CHECK-NEXT: store i16 %[[TRUNC]],
void test_mov_dpp_short(short x, global short *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_mov_dpp_char
// CHECK: %0 = zext i8 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i8
// CHECK-NEXT: store i8 %2,
// CHECK: %[[ZEXT:[0-9]+]] = zext i8 %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i8
// CHECK-NEXT: store i8 %[[TRUNC]],
void test_mov_dpp_char(char x, global char *p) {
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_mov_dpp_half
// CHECK: %0 = load i16,
// CHECK: %1 = zext i16 %0 to i32
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %3 = trunc i32 %2 to i16
// CHECK-NEXT: store i16 %3,
// CHECK: %[[LD:[0-9]+]] = load i16,
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %[[LD]] to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
// CHECK-NEXT: store i16 %[[TRUNC]],
void test_mov_dpp_half(half *x, global half *p) {
*p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
*p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_update_dpp_int
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
void test_update_dpp_int(global int* out, int arg1, int arg2)
{
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
}
// CHECK-LABEL: @test_update_dpp_long
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %0,
// CHECK: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %[[DPP_RET]],
void test_update_dpp_long(long x, global long *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_update_dpp_float
// CHECK: %0 = bitcast float %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i32 %1,
// CHECK: %[[BC:[0-9]+]] = bitcast float %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[BC]], i32 %[[BC]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i32 %[[DPP_RET]],
void test_update_dpp_float(float x, global float *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_update_dpp_double
// CHECK: %0 = bitcast double %x to i64
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %1,
// CHECK: %[[BC:[0-9]+]] = bitcast double %x to i64
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %[[BC]], i64 %[[BC]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: store i64 %[[DPP_RET]],
void test_update_dpp_double(double x, global double *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_update_dpp_short
// CHECK: %0 = zext i16 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i16
// CHECK-NEXT: store i16 %2,
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[ZEXT]], i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
// CHECK-NEXT: store i16 %[[TRUNC]],
void test_update_dpp_short(short x, global short *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_update_dpp_char
// CHECK: %0 = zext i8 %x to i32
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %2 = trunc i32 %1 to i8
// CHECK-NEXT: store i8 %2,
// CHECK: %[[ZEXT:[0-9]+]] = zext i8 %x to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[ZEXT]], i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i8
// CHECK-NEXT: store i8 %[[TRUNC]],
void test_update_dpp_char(char x, global char *p) {
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_update_dpp_half
// CHECK: %0 = load i16,
// CHECK: %1 = zext i16 %0 to i32
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %3 = trunc i32 %2 to i16
// CHECK-NEXT: store i16 %3,
// CHECK: %[[LD:[0-9]+]] = load i16,
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %[[LD]] to i32
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[ZEXT]], i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
// CHECK-NEXT: store i16 %[[TRUNC]],
void test_update_dpp_half(half *x, global half *p) {
*p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
}
// CHECK-LABEL: @test_update_dpp_int_uint
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
void test_update_dpp_int_uint(global int* out, int arg1, unsigned int arg2)
{
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
}
// CHECK-LABEL: @test_update_dpp_lit_int
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
void test_update_dpp_lit_int(global int* out, int arg1)
{
*out = __builtin_amdgcn_update_dpp(5, arg1, 0, 0, 0, false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(5, arg1, 0, 0, 0, false);
}
__constant int gi = 5;
@ -240,7 +269,8 @@ __constant int gi = 5;
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
void test_update_dpp_const_int(global int* out, int arg1)
{
*out = __builtin_amdgcn_update_dpp(gi, arg1, 0, 0, 0, false);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
*out = __builtin_amdgcn_update_dpp(gi, arg1, 0, 0, 0, false);
}
// CHECK-LABEL: @test_ds_fadd
@ -397,14 +427,16 @@ void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
void test_s_memtime(global ulong* out)
{
*out = __builtin_amdgcn_s_memtime();
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memtime))
*out = __builtin_amdgcn_s_memtime();
}
// CHECK-LABEL: @test_perm
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
void test_perm(global uint* out, uint a, uint b, uint s)
{
*out = __builtin_amdgcn_perm(a, b, s);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_perm))
*out = __builtin_amdgcn_perm(a, b, s);
}
// CHECK-LABEL: @test_groupstaticsize

View File

@ -1189,31 +1189,36 @@ kernel void test_ds_consume_lds(__attribute__((address_space(1))) int* out, __at
// CHECK-LABEL: @test_gws_init(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id)
kernel void test_gws_init(uint value, uint id) {
__builtin_amdgcn_ds_gws_init(value, id);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_init))
__builtin_amdgcn_ds_gws_init(value, id);
}
// CHECK-LABEL: @test_gws_barrier(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id)
kernel void test_gws_barrier(uint value, uint id) {
__builtin_amdgcn_ds_gws_barrier(value, id);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_barrier))
__builtin_amdgcn_ds_gws_barrier(value, id);
}
// CHECK-LABEL: @test_gws_sema_v(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.v(i32 %id)
kernel void test_gws_sema_v(uint id) {
__builtin_amdgcn_ds_gws_sema_v(id);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_sema_v))
__builtin_amdgcn_ds_gws_sema_v(id);
}
// CHECK-LABEL: @test_gws_sema_br(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.br(i32 %value, i32 %id)
kernel void test_gws_sema_br(uint value, uint id) {
__builtin_amdgcn_ds_gws_sema_br(value, id);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_sema_br))
__builtin_amdgcn_ds_gws_sema_br(value, id);
}
// CHECK-LABEL: @test_gws_sema_p(
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.p(i32 %id)
kernel void test_gws_sema_p(uint id) {
__builtin_amdgcn_ds_gws_sema_p(id);
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_sema_p))
__builtin_amdgcn_ds_gws_sema_p(id);
}
// CHECK-LABEL: @test_mbcnt_lo(

View File

@ -0,0 +1,62 @@
// REQUIRES: amdgpu-registered-target
// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -target-cpu gfx900 -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -target-cpu gfx1201 -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
__device__ void f(int *ptr, int size, bool f) {
int i = 0;
if (f)
goto label; // expected-error {{cannot jump from this goto statement to its label}}
if (__builtin_amdgcn_processor_is("gfx900")) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
for (i = 0; i < size; ++i) {
label:
ptr[i] = i;
}
}
}
__device__ void g(int *ptr, int size, bool f) {
int i = 0;
if (f)
goto label; // expected-error {{cannot jump from this goto statement to its label}}
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
for (i = 0; i < size; ++i) {
label:
ptr[i] = i;
}
}
}
__global__ void h(int *ptr, int size, bool f) {
int i = 0;
if (f)
goto label; // expected-error {{cannot jump from this goto statement to its label}}
if (__builtin_amdgcn_processor_is("gfx900")) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
for (i = 0; i < size; ++i) {
label:
ptr[i] = i;
}
}
}
__global__ void i(int *ptr, int size, bool f) {
int i = 0;
if (f)
goto label; // expected-error {{cannot jump from this goto statement to its label}}
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
for (i = 0; i < size; ++i) {
label:
ptr[i] = i;
}
}
}

View File

@ -0,0 +1,31 @@
// REQUIRES: amdgpu-registered-target
// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx900 -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1201 -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
// RUN: %clang_cc1 -triple x86_64 -aux-triple amdgcn -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
// RUN: %clang_cc1 -triple x86_64 -aux-triple spirv64-amd-amdhsa -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
__attribute__((device)) auto foo() {
return __builtin_amdgcn_processor_is("gfx900");
}
__attribute__((device)) decltype(auto) bar() {
return __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep);
}
// CHECK: |-TypedefDecl {{.*}} implicit __amdgpu_feature_predicate_t '__amdgpu_feature_predicate_t'
// CHECK-NEXT: | `-BuiltinType {{.*}} '__amdgpu_feature_predicate_t'
// CHECK-DAG: |-FunctionDecl {{.*}} foo '__amdgpu_feature_predicate_t ()'
// CHECK-NEXT: |-CompoundStmt {{.*}}
// CHECK-NEXT: | `-ReturnStmt {{.*}}
// CHECK-NEXT: | `-CallExpr {{.*}} '__amdgpu_feature_predicate_t'
// CHECK-NEXT: | |-ImplicitCastExpr {{.*}} '__amdgpu_feature_predicate_t (*)(const char *) noexcept'
// CHECK-NEXT: | | `-DeclRefExpr {{.*}} Function {{.*}} '__builtin_amdgcn_processor_is' '__amdgpu_feature_predicate_t (const char *) noexcept'
// CHECK-NEXT: | `-StringLiteral {{.*}} "gfx900"
// CHECK-DAG: |-FunctionDecl {{.*}} bar '__amdgpu_feature_predicate_t ()'
// CHECK-NEXT: |-CompoundStmt {{.*}}
// CHECK-NEXT: | `-ReturnStmt {{.*}}
// CHECK-NEXT: | `-CallExpr {{.*}} '__amdgpu_feature_predicate_t'
// CHECK-NEXT: | |-ImplicitCastExpr {{.*}} '__amdgpu_feature_predicate_t (*)() noexcept' <BuiltinFnToFnPtr>
// CHECK-NEXT: | | `-DeclRefExpr {{.*}} Function {{.*}} '__builtin_amdgcn_is_invocable' '__amdgpu_feature_predicate_t () noexcept'

View File

@ -0,0 +1,40 @@
// REQUIRES: amdgpu-registered-target
// REQUIRED: spirv-registered-target
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
__device__ void foo(__amdgpu_feature_predicate_t l) {
decltype(__builtin_amdgcn_processor_is("gfx900")) what; // expected-error {{'what' has type __amdgpu_feature_predicate_t, which is not constructible}}
typeof(__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep)) why; // expected-error {{'why' has type __amdgpu_feature_predicate_t, which is not constructible}}
bool b = true;
__amdgpu_feature_predicate_t v = false; // expected-error {{'v' has type __amdgpu_feature_predicate_t, which is not constructible}}
static_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{static_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
dynamic_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{invalid target type '__amdgpu_feature_predicate_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}}
reinterpret_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{reinterpret_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
__amdgpu_feature_predicate_t k; // expected-error {{'k' has type __amdgpu_feature_predicate_t, which is not constructible}}
int *ip = (int *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'int *'}}
void *vp = (void *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'void *'}}
}
__global__ void bar(__amdgpu_feature_predicate_t l) {
decltype(__builtin_amdgcn_processor_is("gfx900")) what; // expected-error {{'what' has type __amdgpu_feature_predicate_t, which is not constructible}}
typeof(__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep)) why; // expected-error {{'why' has type __amdgpu_feature_predicate_t, which is not constructible}}
bool b = true;
__amdgpu_feature_predicate_t v = false; // expected-error {{'v' has type __amdgpu_feature_predicate_t, which is not constructible}}
static_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{static_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
dynamic_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{invalid target type '__amdgpu_feature_predicate_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}}
reinterpret_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{reinterpret_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
__amdgpu_feature_predicate_t k; // expected-error {{'k' has type __amdgpu_feature_predicate_t, which is not constructible}}
int *ip = (int *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'int *'}}
void *vp = (void *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'void *'}}
}
static_assert(sizeof(__amdgpu_feature_predicate_t) == 0); // expected-error {{invalid application of 'sizeof' to sizeless type '__amdgpu_feature_predicate_t'}}
static_assert(alignof(__amdgpu_feature_predicate_t) == 0); // expected-error {{invalid application of 'alignof' to sizeless type '__amdgpu_feature_predicate_t'}};

View File

@ -0,0 +1,115 @@
// REQUIRES: amdgpu-registered-target
// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wamdgpu-unguarded-builtin-usage %s
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
__device__ void g();
__device__ void f(int x, bool b) {
long v15_16;
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // expected-warning {{the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence might be invalid for some AMDGPU targets}}
// expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_is_processor check to silence this warning}}
if (__builtin_amdgcn_processor_is("gfx90a")) {
long v15_16;
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x));
}
if (!__builtin_amdgcn_processor_is("gfx90a")) {
long v15_16;
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // expected-warning {{the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence might be invalid for some AMDGPU targets}}
// expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_is_processor check to silence this warning}}
}
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var) ? __builtin_amdgcn_s_sleep_var(x) : __builtin_trap();
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var) ? __builtin_amdgcn_s_sleep_var(x) : __builtin_trap(); // expected-error {{'__builtin_amdgcn_s_sleep_var' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s)}}
const auto lambda = [=] __device__ () {
__builtin_amdgcn_s_sleep(42); // expected-warning {{'__builtin_amdgcn_s_sleep' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_sleep' in a __builtin_amdgcn_is_invocable check to silence this warning}}
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var))
__builtin_amdgcn_s_sleep_var(x);
};
const auto generic_lambda = [] __device__ (auto&& y) {
__builtin_amdgcn_s_sleep(42); // expected-warning {{'__builtin_amdgcn_s_sleep' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_sleep' in a __builtin_amdgcn_is_invocable check to silence this warning}}
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) {
__builtin_amdgcn_s_sleep_var(y);
// Has the same requirements - gfx12-insts, thus correct, but we should still warn.
__builtin_amdgcn_s_barrier_signal_var(nullptr, y); // expected-warning {{'__builtin_amdgcn_s_barrier_signal_var' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_barrier_signal_var' in a __builtin_amdgcn_is_invocable check to silence this warning}}
}
};
__builtin_amdgcn_s_sleep(42); // expected-warning {{'__builtin_amdgcn_s_sleep' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_sleep' in a __builtin_amdgcn_is_invocable check to silence this warning}}
if (__builtin_amdgcn_processor_is("gfx1201")) {
if (__builtin_amdgcn_processor_is("gfx906")) // expected-error {{conflicting check for AMDGCN processor '__builtin_amdgcn_processor_is("gfx906")' found in a scope already controlled by a check for AMDGCN processor}}
// expected-note@-2 {{predicate guard, with establishes the context, inserted here}}
__builtin_trap();
}
if (__builtin_amdgcn_processor_is("gfx900")) {
if (__builtin_amdgcn_processor_is("gfx900")) // This is fine, albeit potentially spurious.
++x;
}
if (__builtin_amdgcn_processor_is("gfx1030"))
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-error {{'__builtin_amdgcn_s_barrier_signal_isfirst' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s), which 'gfx1030' does not provide}}
// expected-note@-2 {{predicate guard, with establishes the context, inserted here}}
// Direct guard
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep))
__builtin_amdgcn_s_sleep(42);
// Guarded scope
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) {
if (b) {
g();
while (--x > 42)
__builtin_amdgcn_s_sleep_var(x);
}
}
}
__attribute__((target("arch=gfx1030")))
__device__ void h(int x) {
if (__builtin_amdgcn_processor_is("gfx1030")) // Fine, same processor
return;
long v15_16;
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // "Fine", explicit gfx target
__builtin_amdgcn_s_ttracedata_imm(42); // expected-warning {{'__builtin_amdgcn_s_ttracedata_imm' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_ttracedata_imm' in a __builtin_amdgcn_is_invocable check to silence this warning}}
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-error {{'__builtin_amdgcn_s_barrier_signal_isfirst' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s), which 'gfx1030' does not provide}}
// expected-note@-12 {{predicate guard, with establishes the context, inserted here}}
if (__builtin_amdgcn_processor_is("gfx906")) // expected-error {{conflicting check for AMDGCN processor '__builtin_amdgcn_processor_is("gfx906")' found in a scope already controlled by a check for AMDGCN processor}}
// expected-note@-15 {{predicate guard, with establishes the context, inserted here}}
__builtin_trap();
}
__attribute__((target("gfx11-insts")))
__device__ void i(int x) {
__builtin_amdgcn_s_wait_event_export_ready(); // expected-warning {{'__builtin_amdgcn_s_wait_event_export_ready' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_wait_event_export_ready' in a __builtin_amdgcn_is_invocable check to silence this warning}}
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-error {{'__builtin_amdgcn_s_barrier_signal_isfirst' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s)}}
}
__attribute__((target("gfx11-insts,gfx12-insts")))
__device__ void j(int x) {
__builtin_amdgcn_s_wait_event_export_ready(); // expected-warning {{'__builtin_amdgcn_s_wait_event_export_ready' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_wait_event_export_ready' in a __builtin_amdgcn_is_invocable check to silence this warning}}
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-warning {{'__builtin_amdgcn_s_barrier_signal_isfirst' might be unavailable on some AMDGPU targets}}
// expected-note@-1 {{enclose '__builtin_amdgcn_s_barrier_signal_isfirst' in a __builtin_amdgcn_is_invocable check to silence this warning}}
}

View File

@ -0,0 +1,21 @@
// REQUIRES: amdgpu-registered-target
// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
// expected-no-diagnostics
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
__device__ void foo() {
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16))
return __builtin_trap();
}
__global__ void bar() {
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16))
return __builtin_trap();
}

View File

@ -0,0 +1,21 @@
// REQUIRES: amdgpu-registered-target
// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
// expected-no-diagnostics
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
__device__ void foo() {
if (__builtin_amdgcn_processor_is("gfx900"))
return __builtin_trap();
}
__global__ void bar() {
if (__builtin_amdgcn_processor_is("gfx900"))
return __builtin_trap();
}

View File

@ -345,6 +345,7 @@ private:
.Case("__fp16", "h")
.Case("__hlsl_resource_t", "Qr")
.Case("__amdgpu_buffer_rsrc_t", "Qb")
.Case("__amdgpu_feature_predicate_t", "Qc")
.Case("__amdgpu_texture_t", "Qt")
.Case("__int128_t", "LLLi")
.Case("_Float16", "x")

View File

@ -2403,11 +2403,6 @@ static bool generateSpecConstantInst(const SPIRV::IncomingCall *Call,
switch (Opcode) {
case SPIRV::OpSpecConstant: {
// Build the SpecID decoration.
unsigned SpecId =
static_cast<unsigned>(getIConstVal(Call->Arguments[0], MRI));
buildOpDecorate(Call->ReturnRegister, MIRBuilder, SPIRV::Decoration::SpecId,
{SpecId});
// Determine the constant MI.
Register ConstRegister = Call->Arguments[1];
const MachineInstr *Const = getDefInstrMaybeConstant(ConstRegister, MRI);
@ -2433,6 +2428,11 @@ static bool generateSpecConstantInst(const SPIRV::IncomingCall *Call,
else
addNumImm(ConstOperand.getFPImm()->getValueAPF().bitcastToAPInt(), MIB);
}
// Build the SpecID decoration.
unsigned SpecId =
static_cast<unsigned>(getIConstVal(Call->Arguments[0], MRI));
buildOpDecorate(Call->ReturnRegister, MIRBuilder, SPIRV::Decoration::SpecId,
{SpecId});
return true;
}
case SPIRV::OpSpecConstantComposite: {

View File

@ -4239,7 +4239,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
.addUse(GR.getSPIRVTypeID(ResType));
MIB.constrainAllUses(TII, TRI, RBI);
unsigned SpecId = I.getOperand(2).getImm();
buildOpDecorate(I.getOperand(0).getReg(), *MIB.getInstr(), TII,
buildOpDecorate(I.getOperand(0).getReg(), *++MIB->getIterator(), TII,
SPIRV::Decoration::SpecId, {SpecId});
return true;

View File

@ -439,6 +439,9 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
if (!CF || !CF->isIntrinsic())
continue;
auto *II = cast<IntrinsicInst>(Call);
if (Intrinsic::isTargetIntrinsic(II->getIntrinsicID()) &&
II->getCalledOperand()->getName().starts_with("llvm.spv"))
continue;
switch (II->getIntrinsicID()) {
case Intrinsic::memset:
case Intrinsic::bswap:

View File

@ -6,8 +6,13 @@
//
//===----------------------------------------------------------------------===//
//
// The pass transforms IR globals that cannot be trivially mapped to SPIRV
// into something that is trival to lower.
// The pass:
// - transforms IR globals that cannot be trivially mapped to SPIRV into
// something that is trival to lower;
// - for AMDGCN flavoured SPIRV, it assigns unique IDs to the specialisation
// constants associated with feature predicates, which were inserted by the
// FE when expanding calls to __builtin_amdgcn_processor_is or
// __builtin_amdgcn_is_invocable
//
//===----------------------------------------------------------------------===//
@ -15,9 +20,15 @@
#include "SPIRVUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include <climits>
#include <string>
#define DEBUG_TYPE "spirv-prepare-globals"
using namespace llvm;
@ -71,6 +82,51 @@ bool tryReplaceAliasWithAliasee(GlobalAlias &GA) {
return true;
}
bool tryAssignPredicateSpecConstIDs(Module &M, Function *F) {
StringMap<unsigned> IDs;
for (auto &&U : F->users()) {
auto *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
auto *SpecID = dyn_cast<ConstantInt>(CI->getArgOperand(0));
if (!SpecID)
continue;
unsigned ID = SpecID->getZExtValue();
if (ID != UINT32_MAX)
continue;
// Replace placeholder Specialisation Constant IDs with unique IDs
// associated with the predicate being evaluated, which is encoded via
// spv_assign_name.
auto *MD =
cast<MDNode>(cast<MetadataAsValue>(CI->getOperand(2))->getMetadata());
auto *P = cast<MDString>(MD->getOperand(0));
ID = IDs.try_emplace(P->getString(), IDs.size()).first->second;
CI->setArgOperand(0, ConstantInt::get(CI->getArgOperand(0)->getType(), ID));
}
if (IDs.empty())
return false;
// Store the predicate -> ID mapping as a fixed format string
// (predicate ID\0...), for later use during SPIR-V consumption.
std::string Tmp;
for (auto &&[Predicate, SpecID] : IDs)
Tmp.append(Predicate).append(" ").append(utostr(SpecID)).push_back('\0');
Constant *PredSpecIDStr =
ConstantDataArray::getString(M.getContext(), Tmp, false);
new GlobalVariable(M, PredSpecIDStr->getType(), true,
GlobalVariable::LinkageTypes::ExternalLinkage,
PredSpecIDStr, "llvm.amdgcn.feature.predicate.ids");
return true;
}
bool SPIRVPrepareGlobals::runOnModule(Module &M) {
bool Changed = false;
@ -78,6 +134,16 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
Changed |= tryReplaceAliasWithAliasee(GA);
}
if (M.getTargetTriple().getVendor() != Triple::AMD)
return Changed;
// TODO: Currently, for AMDGCN flavoured SPIR-V, the symbol can only be
// inserted via feature predicate use, but in the future this will need
// revisiting if we start making more liberal use of the intrinsic.
if (Function *F = Intrinsic::getDeclarationIfExists(
&M, Intrinsic::spv_named_boolean_spec_constant))
Changed |= tryAssignPredicateSpecConstIDs(M, F);
return Changed;
}
char SPIRVPrepareGlobals::ID = 0;

View File

@ -0,0 +1,281 @@
; RUN: llc -O0 --verify-machineinstrs -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %}
; CHECK: OpName %[[#KERNEL:]] "kernel"
; CHECK: OpName %[[#FEATURE_PREDICATE_IDS:]] "llvm.amdgcn.feature.predicate.ids"
; CHECK: OpName %[[#SET_FPENV_I64:]] "spirv.llvm_set_fpenv_i64"
; CHECK: OpName %[[#ASHR_PK_I8_I32:]] "spirv.llvm_amdgcn_ashr_pk_i8_i32"
; CHECK: OpName %[[#S_SLEEP_VAR:]] "spirv.llvm_amdgcn_s_sleep_var"
; CHECK: OpName %[[#S_WAIT_EVENT_EXPORT_READY:]] "spirv.llvm_amdgcn_s_wait_event_export_ready"
; CHECK: OpName %[[#S_TTRACEDATA_IMM:]] "spirv.llvm_amdgcn_s_ttracedata_imm"
; CHECK: OpDecorate %[[#IS_GFX950:]] SpecId 8
; CHECK: OpDecorate %[[#IS_GFX1201:]] SpecId 3
; CHECK: OpDecorate %[[#HAS_GFX12_INSTS:]] SpecId 7
; CHECK: OpDecorate %[[#IS_GFX906:]] SpecId 6
; CHECK: OpDecorate %[[#IS_GFX1010:]] SpecId 4
; CHECK: OpDecorate %[[#IS_GFX1101:]] SpecId 5
; CHECK: OpDecorate %[[#IS_GFX1101_1:]] SpecId 5
; CHECK: OpDecorate %[[#IS_GFX1010_1:]] SpecId 4
; CHECK: OpDecorate %[[#IS_GFX1201_1:]] SpecId 3
; CHECK: OpDecorate %[[#HAS_GFX11_INSTS:]] SpecId 0
; CHECK: OpDecorate %[[#HAS_GFX10_INSTS:]] SpecId 2
; CHECK: OpDecorate %[[#HAS_GFX1250_INSTS:]] SpecId 1
; CHECK: OpDecorate %[[#HAS_GFX11_INSTS_1:]] SpecId 0
; CHECK: %[[#BOOL:]] = OpTypeBool
; CHECK: %[[#UCHAR:]] = OpTypeInt 8
; CHECK: %[[#FEATURE_PREDICATE_IDS_MAP_STRLEN:]] = OpConstant %[[#]] 137
; CHECK: %[[#FEATURE_PREDICATE_IDS_MAP_STRTY:]] = OpTypeArray %[[#UCHAR]] %[[#FEATURE_PREDICATE_IDS_MAP_STRLEN]]
; CHECK: %[[#FEATURE_PREDICATE_IDS_MAP_STRVAL:]] = OpConstantComposite %[[#FEATURE_PREDICATE_IDS_MAP_STRTY]]
; CHECK: %[[#FEATURE_PREDICATE_IDS]] = OpVariable %[[#]] CrossWorkgroup %[[#FEATURE_PREDICATE_IDS_MAP_STRVAL]]
; CHECK: %[[#IS_GFX950]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#IS_GFX1201]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#HAS_GFX12_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#IS_GFX906]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#IS_GFX1010]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#IS_GFX1101]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#IS_GFX1101_1]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#IS_GFX1010_1]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#IS_GFX1201_1]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#HAS_GFX11_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#HAS_GFX10_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#HAS_GFX1250_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
; CHECK: %[[#HAS_GFX11_INSTS_1]] = OpSpecConstantFalse %[[#BOOL]]
declare void @llvm.amdgcn.s.monitor.sleep(i16 immarg) addrspace(4)
declare void @llvm.amdgcn.s.sleep(i32 immarg) addrspace(4)
declare i1 @llvm.spv.named.boolean.spec.constant(i32, i1, metadata) addrspace(4)
declare i16 @llvm.amdgcn.ashr.pk.i8.i32(i32, i32, i32) addrspace(4) #3
declare void @llvm.set.fpenv.i64(i64) addrspace(4) #4
declare void @llvm.amdgcn.s.sleep.var(i32) addrspace(4) #5
declare void @llvm.amdgcn.s.wait.event.export.ready() addrspace(4) #5
declare void @llvm.amdgcn.s.ttracedata.imm(i16 immarg) addrspace(4) #6
@p = external addrspace(1) global i32
@g = external addrspace(1) constant i32
define void @kernel() addrspace(4) {
; CHECK-DAG: %[[#KERNEL]] = OpFunction %39 None %40 ; -- Begin function kernel
; CHECK-NEXT: %2 = OpLabel
; CHECK-NEXT: %112 = OpLoad %44 %85 Aligned 4
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX950]] %4 %3
; CHECK-NEXT: %3 = OpLabel
; CHECK-NEXT: %113 = OpFunctionCall %39 %[[#SET_FPENV_I64]] %57
; CHECK-NEXT: OpBranch %5
; CHECK-NEXT: %4 = OpLabel
; CHECK-NEXT: %114 = OpFunctionCall %43 %[[#ASHR_PK_I8_I32]] %56 %56 %56
; CHECK-NEXT: OpBranch %5
; CHECK-NEXT: %5 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1201]] %7 %6
; CHECK-NEXT: %6 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX12_INSTS]] %7 %8
; CHECK-NEXT: %7 = OpLabel
; CHECK-NEXT: %115 = OpFunctionCall %39 %[[#S_SLEEP_VAR]] %112
; CHECK-NEXT: OpBranch %8
; CHECK-NEXT: %8 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX906]] %10 %9
; CHECK-NEXT: %9 = OpLabel
; CHECK-NEXT: %116 = OpFunctionCall %39 %[[#S_WAIT_EVENT_EXPORT_READY]]
; CHECK-NEXT: OpBranch %14
; CHECK-NEXT: %10 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1010]] %12 %11
; CHECK-NEXT: %11 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1101]] %12 %13
; CHECK-NEXT: %12 = OpLabel
; CHECK-NEXT: %117 = OpFunctionCall %39 %[[#S_TTRACEDATA_IMM]] %55
; CHECK-NEXT: OpBranch %13
; CHECK-NEXT: %13 = OpLabel
; CHECK-NEXT: OpBranch %14
; CHECK-NEXT: %14 = OpLabel
; CHECK-NEXT: OpBranch %15
; CHECK-NEXT: %15 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1101_1]] %16 %17
; CHECK-NEXT: %16 = OpLabel
; CHECK-NEXT: %118 = OpLoad %44 %97 Aligned 4
; CHECK-NEXT: %119 = OpIAdd %44 %118 %112
; CHECK-NEXT: OpStore %97 %119 Aligned 4
; CHECK-NEXT: OpBranch %17
; CHECK-NEXT: %17 = OpLabel
; CHECK-NEXT: OpBranch %18
; CHECK-NEXT: %18 = OpLabel
; CHECK-NEXT: %120 = OpLoad %44 %97 Aligned 4
; CHECK-NEXT: %121 = OpISub %44 %120 %112
; CHECK-NEXT: OpStore %97 %121 Aligned 4
; CHECK-NEXT: OpBranch %19
; CHECK-NEXT: %19 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1010_1]] %20 %21
; CHECK-NEXT: %20 = OpLabel
; CHECK-NEXT: OpBranch %21
; CHECK-NEXT: %21 = OpLabel
; CHECK-NEXT: %122 = OpPhi %48 %54 %19 %54 %20
; CHECK-NEXT: OpBranchConditional %122 %18 %22
; CHECK-NEXT: %22 = OpLabel
; CHECK-NEXT: OpBranch %23
; CHECK-NEXT: %23 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1201_1]] %24 %25
; CHECK-NEXT: %24 = OpLabel
; CHECK-NEXT: OpBranch %25
; CHECK-NEXT: %25 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX11_INSTS]] %29 %26
; CHECK-NEXT: %26 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX10_INSTS]] %27 %28
; CHECK-NEXT: %27 = OpLabel
; CHECK-NEXT: %123 = OpFunctionCall %39 %[[#S_TTRACEDATA_IMM]] %55
; CHECK-NEXT: OpBranch %28
; CHECK-NEXT: %28 = OpLabel
; CHECK-NEXT: OpBranch %30
; CHECK-NEXT: %29 = OpLabel
; CHECK-NEXT: %124 = OpFunctionCall %39 %[[#S_WAIT_EVENT_EXPORT_READY]]
; CHECK-NEXT: OpBranch %30
; CHECK-NEXT: %30 = OpLabel
; CHECK-NEXT: OpBranch %31
; CHECK-NEXT: %31 = OpLabel
; CHECK-NEXT: %125 = OpLoad %44 %97 Aligned 4
; CHECK-NEXT: %126 = OpISub %44 %125 %112
; CHECK-NEXT: OpStore %97 %126 Aligned 4
; CHECK-NEXT: OpBranch %32
; CHECK-NEXT: %32 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX1250_INSTS]] %33 %34
; CHECK-NEXT: %33 = OpLabel
; CHECK-NEXT: OpBranch %34
; CHECK-NEXT: %34 = OpLabel
; CHECK-NEXT: %127 = OpPhi %48 %54 %32 %54 %33
; CHECK-NEXT: OpBranchConditional %127 %31 %35
; CHECK-NEXT: %35 = OpLabel
; CHECK-NEXT: OpBranch %36
; CHECK-NEXT: %36 = OpLabel
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX11_INSTS_1]] %37 %38
entry:
%x = load i32, ptr addrspace(1) @g
%is.gfx950. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !9)
br i1 %is.gfx950., label %cond.true, label %cond.false
cond.true:
%0 = call addrspace(4) i16 @llvm.amdgcn.ashr.pk.i8.i32(i32 8, i32 8, i32 8)
br label %cond.end
cond.false:
call addrspace(4) void @llvm.set.fpenv.i64(i64 -1)
br label %cond.end
cond.end:
%is.gfx1201. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
br i1 %is.gfx1201., label %if.then, label %lor.lhs.false
lor.lhs.false:
%has.gfx12-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !11)
br i1 %has.gfx12-insts., label %if.then, label %if.end
if.then:
call addrspace(4) void @llvm.amdgcn.s.sleep.var(i32 %x)
br label %if.end
if.end:
%is.gfx906. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !12)
br i1 %is.gfx906., label %if.else, label %if.then2
if.then2:
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
br label %if.end6
if.else:
%is.gfx1010. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
br i1 %is.gfx1010., label %if.then4, label %lor.lhs.false3
lor.lhs.false3:
%is.gfx1101. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
br i1 %is.gfx1101., label %if.then4, label %if.end5
if.then4:
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
br label %if.end5
if.end5:
br label %if.end6
if.end6:
br label %while.cond
while.cond:
%is.gfx1101.7 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
br i1 %is.gfx1101.7, label %while.body, label %while.end
while.body:
%4 = load i32, ptr addrspace(1) @p
%add = add i32 %4, %x
store i32 %add, ptr addrspace(1) @p
br label %while.end
while.end:
br label %do.body
do.body:
%7 = load i32, ptr addrspace(1) @p
%sub = sub i32 %7, %x
store i32 %sub, ptr addrspace(1) @p
br label %do.cond
do.cond:
%is.gfx1010.8 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
br i1 %is.gfx1010.8, label %land.rhs, label %land.end
land.rhs:
br label %land.end
land.end:
%c = phi i1 [ false, %do.cond ], [ false, %land.rhs ]
br i1 %c, label %do.body, label %do.end
do.end:
br label %for.cond
for.cond:
%is.gfx1201.9 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
br i1 %is.gfx1201.9, label %for.body, label %for.end
for.body:
br label %for.end
for.inc:
%9 = load i32, ptr addrspace(1) @p
%inc = add i32 %9, 1
store i32 %inc, ptr addrspace(1) @p
br label %for.cond
for.end:
%has.gfx11-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
br i1 %has.gfx11-insts., label %if.then10, label %if.else11
if.then10:
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
br label %if.end14
if.else11:
%has.gfx10-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !19)
br i1 %has.gfx10-insts., label %if.then12, label %if.end13
if.then12:
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
br label %if.end13
if.end13:
br label %if.end14
if.end14:
br label %do.body15
do.body15:
%12 = load i32, ptr addrspace(1) @p
%sub16 = sub i32 %12, %x
store i32 %sub16, ptr addrspace(1) @p
br label %do.cond17
do.cond17:
%has.gfx1250-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !20)
br i1 %has.gfx1250-insts., label %land.rhs9, label %land.end10
land.rhs9:
br label %land.end10
land.end10:
%c1 = phi i1 [ false, %do.cond17 ], [ false, %land.rhs9 ]
br i1 %c1, label %do.body15, label %do.end18
do.end18:
br label %for.cond19
for.cond19:
%has.gfx11-insts.20 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
br i1 %has.gfx11-insts.20, label %for.body21, label %for.end24
for.body21:
br label %for.end24
for.inc22:
%14 = load i32, ptr addrspace(1) @p
%inc23 = add i32 %14, 1
store i32 %inc23, ptr addrspace(1) @p
br label %for.cond19
for.end24:
ret void
}
!9 = !{!"is.gfx950"}
!10 = !{!"is.gfx1201"}
!11 = !{!"has.gfx12-insts"}
!12 = !{!"is.gfx906"}
!13 = !{!"is.gfx1010"}
!14 = !{!"is.gfx1101"}
!18 = !{!"has.gfx11-insts"}
!19 = !{!"has.gfx10-insts"}
!20 = !{!"has.gfx1250-insts"}

View File

@ -0,0 +1,153 @@
; RUN: llc -O0 -mtriple=spirv64-amd-amdhsa %s -print-after-all -o - 2>&1 | FileCheck %s
; RUN: llc -O3 -mtriple=spirv64-amd-amdhsa %s -print-after-all -o - 2>&1 | FileCheck %s
; CHECK: *** IR Dump After SPIRV prepare global variables (prepare-globals) ***
declare void @llvm.amdgcn.s.monitor.sleep(i16 immarg) addrspace(4)
declare void @llvm.amdgcn.s.sleep(i32 immarg) addrspace(4)
declare i1 @_Z20__spirv_SpecConstantib(i32, i1) addrspace(4)
declare i16 @llvm.amdgcn.ashr.pk.i8.i32(i32, i32, i32) addrspace(4) #3
declare void @llvm.set.fpenv.i64(i64) addrspace(4) #4
declare void @llvm.amdgcn.s.sleep.var(i32) addrspace(4) #5
declare void @llvm.amdgcn.s.wait.event.export.ready() addrspace(4) #5
declare void @llvm.amdgcn.s.ttracedata.imm(i16 immarg) addrspace(4) #6
@p = external addrspace(1) global i32
@g = external addrspace(1) constant i32
; CHECK: @llvm.amdgcn.feature.predicate.ids = addrspace(1) constant [137 x i8] c"is.gfx1010 4\00is.gfx950 0\00is.gfx1101 5\00has.gfx11-insts 6\00is.gfx906 3\00is.gfx1201 1\00has.gfx1250-insts 8\00has.gfx10-insts 7\00has.gfx12-insts 2\00"
define void @kernel() addrspace(4) {
entry:
%x = load i32, ptr addrspace(1) @g
%is.gfx950. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !9)
br i1 %is.gfx950., label %cond.true, label %cond.false
cond.true:
%0 = call addrspace(4) i16 @llvm.amdgcn.ashr.pk.i8.i32(i32 8, i32 8, i32 8)
br label %cond.end
cond.false:
call addrspace(4) void @llvm.set.fpenv.i64(i64 -1)
br label %cond.end
cond.end:
%is.gfx1201. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
br i1 %is.gfx1201., label %if.then, label %lor.lhs.false
lor.lhs.false:
%has.gfx12-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !11)
br i1 %has.gfx12-insts., label %if.then, label %if.end
if.then:
call addrspace(4) void @llvm.amdgcn.s.sleep.var(i32 %x)
br label %if.end
if.end:
%is.gfx906. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !12)
br i1 %is.gfx906., label %if.else, label %if.then2
if.then2:
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
br label %if.end6
if.else:
%is.gfx1010. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
br i1 %is.gfx1010., label %if.then4, label %lor.lhs.false3
lor.lhs.false3:
%is.gfx1101. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
br i1 %is.gfx1101., label %if.then4, label %if.end5
if.then4:
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
br label %if.end5
if.end5:
br label %if.end6
if.end6:
br label %while.cond
while.cond:
%is.gfx1101.7 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
br i1 %is.gfx1101.7, label %while.body, label %while.end
while.body:
%4 = load i32, ptr addrspace(1) @p
%add = add i32 %4, %x
store i32 %add, ptr addrspace(1) @p
br label %while.end
while.end:
br label %do.body
do.body:
%7 = load i32, ptr addrspace(1) @p
%sub = sub i32 %7, %x
store i32 %sub, ptr addrspace(1) @p
br label %do.cond
do.cond:
%is.gfx1010.8 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
br i1 %is.gfx1010.8, label %land.rhs, label %land.end
land.rhs:
br label %land.end
land.end:
%c = phi i1 [ false, %do.cond ], [ false, %land.rhs ]
br i1 %c, label %do.body, label %do.end
do.end:
br label %for.cond
for.cond:
%is.gfx1201.9 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
br i1 %is.gfx1201.9, label %for.body, label %for.end
for.body:
br label %for.end
for.inc:
%9 = load i32, ptr addrspace(1) @p
%inc = add i32 %9, 1
store i32 %inc, ptr addrspace(1) @p
br label %for.cond
for.end:
%has.gfx11-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
br i1 %has.gfx11-insts., label %if.then10, label %if.else11
if.then10:
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
br label %if.end14
if.else11:
%has.gfx10-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !19)
br i1 %has.gfx10-insts., label %if.then12, label %if.end13
if.then12:
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
br label %if.end13
if.end13:
br label %if.end14
if.end14:
br label %do.body15
do.body15:
%12 = load i32, ptr addrspace(1) @p
%sub16 = sub i32 %12, %x
store i32 %sub16, ptr addrspace(1) @p
br label %do.cond17
do.cond17:
%has.gfx1250-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !20)
br i1 %has.gfx1250-insts., label %land.rhs9, label %land.end10
land.rhs9:
br label %land.end10
land.end10:
%c1 = phi i1 [ false, %do.cond17 ], [ false, %land.rhs9 ]
br i1 %c1, label %do.body15, label %do.end18
do.end18:
br label %for.cond19
for.cond19:
%has.gfx11-insts.20 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
br i1 %has.gfx11-insts.20, label %for.body21, label %for.end24
for.body21:
br label %for.end24
for.inc22:
%14 = load i32, ptr addrspace(1) @p
%inc23 = add i32 %14, 1
store i32 %inc23, ptr addrspace(1) @p
br label %for.cond19
for.end24:
ret void
}
!9 = !{!"is.gfx950"}
!10 = !{!"is.gfx1201"}
!11 = !{!"has.gfx12-insts"}
!12 = !{!"is.gfx906"}
!13 = !{!"is.gfx1010"}
!14 = !{!"is.gfx1101"}
!18 = !{!"has.gfx11-insts"}
!19 = !{!"has.gfx10-insts"}
!20 = !{!"has.gfx1250-insts"}