[SPIRV][AMDGPU][clang][CodeGen][opt] Add late-resolved feature identifying predicates (#134016)
This change adds two builtins for AMDGPU: - `__builtin_amdgcn_processor_is`, which is similar in observable behaviour with `__builtin_cpu_is`, except that it is never "evaluated" at run time; - `__builtin_amdgcn_is_invocable`, which is behaviourally similar with `__has_builtin`, except that it is not a macro (i.e. not evaluated at preprocessing time). Neither of these are `constexpr`, even though when compiling for concrete (i.e. `gfxXXX` / `gfxXXX-generic`) targets they get evaluated in Clang, so they shouldn't tear the AST too badly / at all for multi-pass compilation cases like HIP. They can only be used in specific contexts (as args to control structures). The motivation for adding these is two-fold: - as a nice to have, it provides an AST-visible way to incorporate architecture specific code, rather than having to rely on macros and the preprocessor, which burn in the choice quite early; - as a must have, it allows featureful AMDGCN flavoured SPIR-V to be produced, where target specific capability is guarded and chosen or discarded when finalising compilation for a concrete target; this is built atop the Speciali\ation Constant concept which is described in the SPIR-V specification under section [2.12 Specialization](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_specialization_2) I've tried to keep the overall footprint of the change small. The changes to Sema are a bit unpleasant, but there was a strong desire to have Clang validate these, and to constrain their uses, and this was the most compact solution I could come up with (suggestions welcome). --------- Co-authored-by: Juan Manuel Martinez Caamaño <jmartinezcaamao@gmail.com> Co-authored-by: Voicu <avoicu@amd.com>
This commit is contained in:
parent
5b00cdf8e1
commit
18e6958903
@ -5558,6 +5558,120 @@ If no address spaces names are provided, all address spaces are fenced.
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
|
||||
|
||||
__builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
``__builtin_amdgcn_processor_is`` and ``__builtin_amdgcn_is_invocable`` provide
|
||||
a functional mechanism for programatically querying:
|
||||
|
||||
* the identity of the current target processor;
|
||||
* the capability of the current target processor to invoke a particular builtin.
|
||||
|
||||
**Syntax**:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
__amdgpu_feature_predicate_t __builtin_amdgcn_processor_is(const char*);
|
||||
__amdgpu_feature_predicate_t __builtin_amdgcn_is_invocable(builtin_name);
|
||||
|
||||
**Example of use**:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx1201") ||
|
||||
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var))
|
||||
__builtin_amdgcn_s_sleep_var(x);
|
||||
|
||||
if (!__builtin_amdgcn_processor_is("gfx906"))
|
||||
__builtin_amdgcn_s_wait_event_export_ready();
|
||||
else if (__builtin_amdgcn_processor_is("gfx1010") ||
|
||||
__builtin_amdgcn_processor_is("gfx1101"))
|
||||
__builtin_amdgcn_s_ttracedata_imm(1);
|
||||
|
||||
while (__builtin_amdgcn_processor_is("gfx1101")) *p += x;
|
||||
|
||||
do {
|
||||
break;
|
||||
} while (__builtin_amdgcn_processor_is("gfx1010"));
|
||||
|
||||
for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break;
|
||||
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready))
|
||||
__builtin_amdgcn_s_wait_event_export_ready();
|
||||
else if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_ttracedata_imm))
|
||||
__builtin_amdgcn_s_ttracedata_imm(1);
|
||||
|
||||
do {
|
||||
break;
|
||||
} while (
|
||||
__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
|
||||
|
||||
for (; __builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64); ++*p)
|
||||
break;
|
||||
|
||||
**Description**:
|
||||
|
||||
The builtins return a value of type ``__amdgpu_feature_predicate_t``, which is a
|
||||
target specific type that behaves as if its C++ definition was the following:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
struct __amdgpu_feature_predicate_t {
|
||||
__amdgpu_feature_predicate_t() = delete;
|
||||
__amdgpu_feature_predicate_t(const __amdgpu_feature_predicate_t&) = delete;
|
||||
__amdgpu_feature_predicate_t(__amdgpu_feature_predicate_t&&) = delete;
|
||||
|
||||
explicit
|
||||
operator bool() const noexcept;
|
||||
};
|
||||
|
||||
The builtins can be used in C as well, wherein the
|
||||
``__amdgpu_feature_predicate_t`` type behaves as an opaque, forward declared
|
||||
type with conditional automated conversion to ``_Bool`` when used as the
|
||||
predicate argument to a control structure:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct __amdgpu_feature_predicate_t ret(); // Error
|
||||
void arg(struct __amdgpu_feature_predicate_t); // Error
|
||||
void local() {
|
||||
struct __amdgpu_feature_predicate_t x; // Error
|
||||
struct __amdgpu_feature_predicate_t y =
|
||||
__builtin_amdgcn_processor_is("gfx900"); // Error
|
||||
}
|
||||
void valid_use() {
|
||||
_Bool x = (_Bool)__builtin_amdgcn_processor_is("gfx900"); // OK
|
||||
if (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool
|
||||
return;
|
||||
for (; __builtin_amdgcn_processor_is("gfx900");) // Implicit cast to _Bool
|
||||
break;
|
||||
while (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool
|
||||
break;
|
||||
do {
|
||||
break;
|
||||
} while (__builtin_amdgcn_processor_is("gfx900")); // Implicit cast to _Bool
|
||||
|
||||
__builtin_amdgcn_processor_is("gfx900") ? x : !x;
|
||||
}
|
||||
|
||||
The boolean interpretation of the predicate values returned by the builtins:
|
||||
|
||||
* indicates whether the current target matches the argument; the argument MUST
|
||||
be a string literal and a valid AMDGPU target
|
||||
* indicates whether the builtin function passed as the argument can be invoked
|
||||
by the current target; the argument MUST be either a generic or AMDGPU
|
||||
specific builtin name
|
||||
|
||||
When invoked while compiling for a concrete target, the builtins are evaluated
|
||||
early by Clang, and never produce any CodeGen effects / have no observable
|
||||
side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
|
||||
which is an abstract target, a series of specialization constants are implicitly
|
||||
created, in correspondence with the predicates. These predicates get resolved
|
||||
when finalizing the compilation process for a concrete target, and shall reflect
|
||||
the latter's identity and features. Thus, it is possible to author high-level
|
||||
code, in e.g. HIP, that is target adaptive in a dynamic fashion, contrary to
|
||||
macro based mechanisms.
|
||||
|
||||
__builtin_amdgcn_ballot_w{32,64}
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
||||
@ -458,7 +458,10 @@ Target Specific Changes
|
||||
|
||||
AMDGPU Support
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
- Introduced a new target specific builtin ``__builtin_amdgcn_processor_is``,
|
||||
a late / deferred query for the current target processor.
|
||||
- Introduced a new target specific builtin ``__builtin_amdgcn_is_invocable``,
|
||||
a late / deferred query for the availability of target specific builtins.
|
||||
- Initial support for gfx1310
|
||||
|
||||
NVPTX Support
|
||||
|
||||
@ -20,11 +20,19 @@
|
||||
AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
|
||||
#endif
|
||||
|
||||
#ifndef AMDGPU_FEATURE_PREDICATE_TYPE
|
||||
#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
|
||||
AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
|
||||
#endif
|
||||
|
||||
AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8)
|
||||
AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_texture_t", AMDGPUTexture, AMDGPUTextureTy, 256, 256, 0)
|
||||
|
||||
AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0)
|
||||
|
||||
AMDGPU_FEATURE_PREDICATE_TYPE("__amdgpu_feature_predicate_t", AMDGPUFeaturePredicate, AMDGPUFeaturePredicateTy, 1, 1)
|
||||
|
||||
#undef AMDGPU_TYPE
|
||||
#undef AMDGPU_OPAQUE_PTR_TYPE
|
||||
#undef AMDGPU_NAMED_BARRIER_TYPE
|
||||
#undef AMDGPU_FEATURE_PREDICATE_TYPE
|
||||
|
||||
@ -34,6 +34,7 @@
|
||||
// Q -> target builtin type, followed by a character to distinguish the builtin type
|
||||
// Qa -> AArch64 svcount_t builtin type.
|
||||
// Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type.
|
||||
// Qc -> AMDGPU __amdgpu_feature_predicate_t builtin type.
|
||||
// Qt -> AMDGPU __amdgpu_texture_t builtin type.
|
||||
// E -> ext_vector, followed by the number of elements and the base type.
|
||||
// X -> _Complex, followed by the base type.
|
||||
|
||||
@ -523,6 +523,11 @@ def __builtin_amdgcn_endpgm : AMDGPUBuiltin<"void()", [NoReturn]>;
|
||||
def __builtin_amdgcn_get_fpenv : AMDGPUBuiltin<"uint64_t()">;
|
||||
def __builtin_amdgcn_set_fpenv : AMDGPUBuiltin<"void(uint64_t)">;
|
||||
|
||||
// These are special FE only builtins intended for forwarding the requirements
|
||||
// to the ME.
|
||||
def __builtin_amdgcn_processor_is : AMDGPUBuiltin<"__amdgpu_feature_predicate_t(char const *)", [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments]>;
|
||||
def __builtin_amdgcn_is_invocable : AMDGPUBuiltin<"__amdgpu_feature_predicate_t()", [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Wave Reduction builtins.
|
||||
|
||||
@ -1927,3 +1927,6 @@ def TrivialAutoVarInit : DiagGroup<"trivial-auto-var-init">;
|
||||
|
||||
// A warning for options that enable a feature that is not yet complete
|
||||
def ExperimentalOption : DiagGroup<"experimental-option">;
|
||||
|
||||
// Warnings about unguarded usages of AMDGPU target specific constructs
|
||||
def UnguardedBuiltinUsageAMDGPU : DiagGroup<"amdgpu-unguarded-builtin-usage">;
|
||||
|
||||
@ -14117,6 +14117,47 @@ def note_acc_reduction_combiner_forming
|
||||
// AMDGCN builtins diagnostics
|
||||
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
|
||||
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
|
||||
def err_amdgcn_processor_is_arg_not_literal
|
||||
: Error<"the argument to __builtin_amdgcn_processor_is must be a string "
|
||||
"literal">;
|
||||
def err_amdgcn_processor_is_arg_invalid_value
|
||||
: Error<"the argument to __builtin_amdgcn_processor_is must be a valid "
|
||||
"AMDGCN processor identifier; '%0' is not valid">;
|
||||
def note_amdgcn_processor_is_valid_options
|
||||
: Note<"valid AMDGCN processor identifiers are: %0">;
|
||||
def err_amdgcn_is_invocable_arg_invalid_value
|
||||
: Error<"the argument to __builtin_amdgcn_is_invocable must be either a "
|
||||
"target agnostic builtin or an AMDGCN target specific builtin; '%0'"
|
||||
" is not valid">;
|
||||
def err_amdgcn_predicate_type_is_not_constructible
|
||||
: Error<"%0 has type __amdgpu_feature_predicate_t, which is not"
|
||||
" constructible">;
|
||||
def err_amdgcn_predicate_type_needs_explicit_bool_cast
|
||||
: Error<"%0 must be explicitly cast to %1; however, please note that this "
|
||||
"is almost always an error and that it prevents the effective "
|
||||
"guarding of target dependent code, and thus should be avoided">;
|
||||
def note_amdgcn_protected_by_predicate : Note<"jump enters statement controlled"
|
||||
" by AMDGPU feature predicate">;
|
||||
def err_amdgcn_conflicting_is_processor_options
|
||||
: Error<"conflicting check for AMDGCN processor %0 found in a scope already"
|
||||
" controlled by a check for AMDGCN processor">;
|
||||
def note_amdgcn_previous_is_processor_guard
|
||||
: Note<"predicate guard, with establishes the context, inserted here">;
|
||||
def warn_amdgcn_unguarded_asm_stmt
|
||||
: Warning<"the '%0' ASM sequence might be invalid for some AMDGPU targets">,
|
||||
InGroup<UnguardedBuiltinUsageAMDGPU>, DefaultIgnore;
|
||||
def note_amdgcn_unguarded_asm_silence
|
||||
: Note<"enclose the '%0' ASM sequence in a scope controlled by a "
|
||||
"__builtin_amdgcn_is_processor check to silence this warning">;
|
||||
def err_amdgcn_incompatible_builtin
|
||||
: Error<"%0 cannot be invoked in the current context, as it requires the "
|
||||
"'%1' feature(s)%select{|, which '%3' does not provide}2">;
|
||||
def warn_amdgcn_unguarded_builtin :
|
||||
Warning<"%0 might be unavailable on some AMDGPU targets">,
|
||||
InGroup<UnguardedBuiltinUsageAMDGPU>, DefaultIgnore;
|
||||
def note_amdgcn_unguarded_builtin_silence
|
||||
: Note<"enclose %0 in a __builtin_amdgcn_is_invocable check to silence "
|
||||
"this warning">;
|
||||
|
||||
def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">;
|
||||
|
||||
|
||||
@ -15,12 +15,17 @@
|
||||
|
||||
#include "clang/AST/ASTFwd.h"
|
||||
#include "clang/Sema/SemaBase.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
|
||||
namespace clang {
|
||||
class AttributeCommonInfo;
|
||||
class Expr;
|
||||
class ParsedAttr;
|
||||
|
||||
class SemaAMDGPU : public SemaBase {
|
||||
llvm::SmallPtrSet<Expr *, 32> ExpandedPredicates;
|
||||
llvm::SmallPtrSet<FunctionDecl *, 32> PotentiallyUnguardedBuiltinUsers;
|
||||
|
||||
public:
|
||||
SemaAMDGPU(Sema &S);
|
||||
|
||||
@ -73,6 +78,16 @@ public:
|
||||
void handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL);
|
||||
void handleAMDGPUMaxNumWorkGroupsAttr(Decl *D, const ParsedAttr &AL);
|
||||
void handleAMDGPUFlatWorkGroupSizeAttr(Decl *D, const ParsedAttr &AL);
|
||||
|
||||
/// Expand a valid use of the feature identification builtins into its
|
||||
/// corresponding sequence of instructions.
|
||||
Expr *ExpandAMDGPUPredicateBuiltIn(Expr *CE);
|
||||
bool IsPredicate(Expr *E) const;
|
||||
/// Diagnose unguarded usages of AMDGPU builtins and recommend guarding with
|
||||
/// __builtin_amdgcn_is_invocable
|
||||
void AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD);
|
||||
bool HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const;
|
||||
void DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD);
|
||||
};
|
||||
} // namespace clang
|
||||
|
||||
|
||||
@ -1166,7 +1166,7 @@ enum PredefinedTypeIDs {
|
||||
///
|
||||
/// Type IDs for non-predefined types will start at
|
||||
/// NUM_PREDEF_TYPE_IDs.
|
||||
const unsigned NUM_PREDEF_TYPE_IDS = 514;
|
||||
const unsigned NUM_PREDEF_TYPE_IDS = 515;
|
||||
|
||||
// Ensure we do not overrun the predefined types we reserved
|
||||
// in the enum PredefinedTypeIDs above.
|
||||
|
||||
@ -1426,7 +1426,12 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
|
||||
}
|
||||
|
||||
if (Target.getTriple().isAMDGPU() ||
|
||||
(AuxTarget && AuxTarget->getTriple().isAMDGPU())) {
|
||||
(Target.getTriple().isSPIRV() &&
|
||||
Target.getTriple().getVendor() == llvm::Triple::AMD) ||
|
||||
(AuxTarget &&
|
||||
(AuxTarget->getTriple().isAMDGPU() ||
|
||||
((AuxTarget->getTriple().isSPIRV() &&
|
||||
AuxTarget->getTriple().getVendor() == llvm::Triple::AMD))))) {
|
||||
#define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \
|
||||
InitBuiltinType(SingletonId, BuiltinType::Id);
|
||||
#include "clang/Basic/AMDGPUTypes.def"
|
||||
@ -12739,6 +12744,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
|
||||
Type = Context.AMDGPUBufferRsrcTy;
|
||||
break;
|
||||
}
|
||||
case 'c': {
|
||||
Type = Context.AMDGPUFeaturePredicateTy;
|
||||
break;
|
||||
}
|
||||
case 't': {
|
||||
Type = Context.AMDGPUTextureTy;
|
||||
break;
|
||||
|
||||
@ -2585,6 +2585,8 @@ bool Type::isSizelessBuiltinType() const {
|
||||
// HLSL intangible types
|
||||
#define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
|
||||
#include "clang/Basic/HLSLIntangibleTypes.def"
|
||||
// AMDGPU feature predicate type
|
||||
case BuiltinType::AMDGPUFeaturePredicate:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
||||
@ -181,3 +181,12 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {
|
||||
Float128Format = DoubleFormat;
|
||||
}
|
||||
}
|
||||
|
||||
bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {
|
||||
return AMDGPUTI.isValidCPUName(CPU);
|
||||
}
|
||||
|
||||
void SPIRV64AMDGCNTargetInfo::fillValidCPUList(
|
||||
SmallVectorImpl<StringRef> &Values) const {
|
||||
return AMDGPUTI.fillValidCPUList(Values);
|
||||
}
|
||||
|
||||
@ -480,6 +480,11 @@ public:
|
||||
}
|
||||
|
||||
bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
|
||||
|
||||
// This is only needed for validating arguments passed to
|
||||
// __builtin_amdgcn_processor_is
|
||||
bool isValidCPUName(StringRef Name) const override;
|
||||
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
|
||||
};
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY SPIRV64IntelTargetInfo final
|
||||
|
||||
@ -1105,6 +1105,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
|
||||
DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_unsigned); \
|
||||
return SingletonId; \
|
||||
}
|
||||
#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
|
||||
case BuiltinType::Id: { \
|
||||
if (!SingletonId) \
|
||||
SingletonId = \
|
||||
DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_boolean); \
|
||||
return SingletonId; \
|
||||
}
|
||||
#include "clang/Basic/AMDGPUTypes.def"
|
||||
case BuiltinType::UChar:
|
||||
case BuiltinType::Char_U:
|
||||
|
||||
@ -1040,6 +1040,10 @@ Value *ScalarExprEmitter::EmitConversionToBool(Value *Src, QualType SrcType) {
|
||||
if (const MemberPointerType *MPT = dyn_cast<MemberPointerType>(SrcType))
|
||||
return CGF.CGM.getCXXABI().EmitMemberPointerIsNotNull(CGF, Src, MPT);
|
||||
|
||||
// The conversion is a NOP, and will be done when CodeGening the builtin.
|
||||
if (SrcType == CGF.getContext().AMDGPUFeaturePredicateTy)
|
||||
return Src;
|
||||
|
||||
assert((SrcType->isIntegerType() || isa<llvm::PointerType>(Src->getType())) &&
|
||||
"Unknown scalar type to convert");
|
||||
|
||||
|
||||
@ -610,6 +610,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
|
||||
case BuiltinType::Id: \
|
||||
return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier", \
|
||||
{}, {Scope});
|
||||
#define AMDGPU_FEATURE_PREDICATE_TYPE(Name, Id, SingletonId, Width, Align) \
|
||||
case BuiltinType::Id: \
|
||||
return ConvertType(getContext().getLogicalOperationType());
|
||||
#include "clang/Basic/AMDGPUTypes.def"
|
||||
#define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
|
||||
#include "clang/Basic/HLSLIntangibleTypes.def"
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/IntrinsicsR600.h"
|
||||
#include "llvm/IR/IntrinsicsSPIRV.h"
|
||||
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
|
||||
#include "llvm/Support/AMDGPUAddrSpace.h"
|
||||
#include "llvm/Support/AtomicOrdering.h"
|
||||
@ -458,6 +459,19 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
|
||||
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
|
||||
}
|
||||
|
||||
static Value *GetAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
|
||||
Constant *SpecId = ConstantInt::getAllOnesValue(CGF.Int32Ty);
|
||||
|
||||
LLVMContext &Ctx = CGF.getLLVMContext();
|
||||
MDNode *Predicate = MDNode::get(Ctx, MDString::get(Ctx, Name.str()));
|
||||
std::vector<Value *> Args = {SpecId, ConstantInt::getFalse(Ctx),
|
||||
MetadataAsValue::get(Ctx, Predicate)};
|
||||
CallInst *Call = CGF.Builder.CreateIntrinsic(
|
||||
Intrinsic::spv_named_boolean_spec_constant, Args);
|
||||
|
||||
return Call;
|
||||
}
|
||||
|
||||
static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
|
||||
switch (BuiltinID) {
|
||||
default:
|
||||
@ -1014,6 +1028,23 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
|
||||
llvm::Value *Env = EmitScalarExpr(E->getArg(0));
|
||||
return Builder.CreateCall(F, {Env});
|
||||
}
|
||||
case AMDGPU::BI__builtin_amdgcn_processor_is: {
|
||||
assert(CGM.getTriple().isSPIRV() &&
|
||||
"__builtin_amdgcn_processor_is should never reach CodeGen for "
|
||||
"concrete targets!");
|
||||
StringRef Proc = cast<clang::StringLiteral>(E->getArg(0))->getString();
|
||||
return GetAMDGPUPredicate(*this, "is." + Proc);
|
||||
}
|
||||
case AMDGPU::BI__builtin_amdgcn_is_invocable: {
|
||||
assert(CGM.getTriple().isSPIRV() &&
|
||||
"__builtin_amdgcn_is_invocable should never reach CodeGen for "
|
||||
"concrete targets!");
|
||||
auto *FD = cast<FunctionDecl>(
|
||||
cast<DeclRefExpr>(E->getArg(0))->getReferencedDeclOfCallee());
|
||||
StringRef RF =
|
||||
getContext().BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
|
||||
return GetAMDGPUPredicate(*this, "has." + RF);
|
||||
}
|
||||
case AMDGPU::BI__builtin_amdgcn_read_exec:
|
||||
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
|
||||
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
#include "clang/AST/StmtOpenACC.h"
|
||||
#include "clang/AST/StmtOpenMP.h"
|
||||
#include "clang/Basic/SourceLocation.h"
|
||||
#include "clang/Sema/SemaAMDGPU.h"
|
||||
#include "clang/Sema/SemaInternal.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
using namespace clang;
|
||||
@ -369,8 +370,10 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
|
||||
|
||||
case Stmt::IfStmtClass: {
|
||||
IfStmt *IS = cast<IfStmt>(S);
|
||||
bool AMDGPUPredicate = false;
|
||||
if (!(IS->isConstexpr() || IS->isConsteval() ||
|
||||
IS->isObjCAvailabilityCheck()))
|
||||
IS->isObjCAvailabilityCheck() ||
|
||||
(AMDGPUPredicate = this->S.AMDGPU().IsPredicate(IS->getCond()))))
|
||||
break;
|
||||
|
||||
unsigned Diag = diag::note_protected_by_if_available;
|
||||
@ -378,6 +381,8 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
|
||||
Diag = diag::note_protected_by_constexpr_if;
|
||||
else if (IS->isConsteval())
|
||||
Diag = diag::note_protected_by_consteval_if;
|
||||
else if (AMDGPUPredicate)
|
||||
Diag = diag::note_amdgcn_protected_by_predicate;
|
||||
|
||||
if (VarDecl *Var = IS->getConditionVariable())
|
||||
BuildScopeInformation(Var, ParentScope);
|
||||
|
||||
@ -568,8 +568,13 @@ void Sema::Initialize() {
|
||||
}
|
||||
|
||||
if (Context.getTargetInfo().getTriple().isAMDGPU() ||
|
||||
(Context.getTargetInfo().getTriple().isSPIRV() &&
|
||||
Context.getTargetInfo().getTriple().getVendor() == llvm::Triple::AMD) ||
|
||||
(Context.getAuxTargetInfo() &&
|
||||
Context.getAuxTargetInfo()->getTriple().isAMDGPU())) {
|
||||
(Context.getAuxTargetInfo()->getTriple().isAMDGPU() ||
|
||||
(Context.getAuxTargetInfo()->getTriple().isSPIRV() &&
|
||||
Context.getAuxTargetInfo()->getTriple().getVendor() ==
|
||||
llvm::Triple::AMD)))) {
|
||||
#define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \
|
||||
addImplicitTypedef(Name, Context.SingletonId);
|
||||
#include "clang/Basic/AMDGPUTypes.def"
|
||||
|
||||
@ -11,14 +11,24 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang/Sema/SemaAMDGPU.h"
|
||||
#include "clang/AST/Decl.h"
|
||||
#include "clang/AST/DynamicRecursiveASTVisitor.h"
|
||||
#include "clang/AST/Expr.h"
|
||||
#include "clang/Basic/DiagnosticFrontend.h"
|
||||
#include "clang/Basic/DiagnosticSema.h"
|
||||
#include "clang/Basic/TargetBuiltins.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/Sema/Ownership.h"
|
||||
#include "clang/Sema/Scope.h"
|
||||
#include "clang/Sema/Sema.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/Support/AMDGPUAddrSpace.h"
|
||||
#include "llvm/Support/AtomicOrdering.h"
|
||||
#include "llvm/TargetParser/TargetParser.h"
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
namespace clang {
|
||||
|
||||
@ -742,4 +752,298 @@ void SemaAMDGPU::handleAMDGPUMaxNumWorkGroupsAttr(Decl *D,
|
||||
addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr);
|
||||
}
|
||||
|
||||
Expr *SemaAMDGPU::ExpandAMDGPUPredicateBuiltIn(Expr *E) {
|
||||
CallExpr *CE = cast<CallExpr>(E->IgnoreParens());
|
||||
ASTContext &Ctx = getASTContext();
|
||||
QualType BoolTy = Ctx.getLogicalOperationType();
|
||||
llvm::APInt False = llvm::APInt::getZero(Ctx.getIntWidth(BoolTy));
|
||||
llvm::APInt True = llvm::APInt::getAllOnes(Ctx.getIntWidth(BoolTy));
|
||||
SourceLocation Loc = CE->getExprLoc();
|
||||
|
||||
if (!CE->getBuiltinCallee())
|
||||
return *ExpandedPredicates
|
||||
.insert(IntegerLiteral::Create(Ctx, False, BoolTy, Loc))
|
||||
.first;
|
||||
|
||||
bool P = false;
|
||||
unsigned BI = CE->getBuiltinCallee();
|
||||
if (Ctx.BuiltinInfo.isAuxBuiltinID(BI))
|
||||
BI = Ctx.BuiltinInfo.getAuxBuiltinID(BI);
|
||||
|
||||
if (BI == AMDGPU::BI__builtin_amdgcn_processor_is) {
|
||||
auto *GFX = dyn_cast<StringLiteral>(CE->getArg(0)->IgnoreParenCasts());
|
||||
if (!GFX) {
|
||||
Diag(Loc, diag::err_amdgcn_processor_is_arg_not_literal);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StringRef N = GFX->getString();
|
||||
const TargetInfo &TI = Ctx.getTargetInfo();
|
||||
const TargetInfo *AuxTI = Ctx.getAuxTargetInfo();
|
||||
if (!TI.isValidCPUName(N) && (!AuxTI || !AuxTI->isValidCPUName(N))) {
|
||||
Diag(Loc, diag::err_amdgcn_processor_is_arg_invalid_value) << N;
|
||||
SmallVector<StringRef, 32> ValidList;
|
||||
if (TI.getTriple().getVendor() == llvm::Triple::VendorType::AMD)
|
||||
TI.fillValidCPUList(ValidList);
|
||||
else if (AuxTI) // Since the BI is present it must be an AMDGPU triple.
|
||||
AuxTI->fillValidCPUList(ValidList);
|
||||
if (!ValidList.empty())
|
||||
Diag(Loc, diag::note_amdgcn_processor_is_valid_options)
|
||||
<< llvm::join(ValidList, ", ");
|
||||
return nullptr;
|
||||
}
|
||||
if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
|
||||
CE->setType(BoolTy);
|
||||
return *ExpandedPredicates.insert(CE).first;
|
||||
}
|
||||
|
||||
if (auto TID = Ctx.getTargetInfo().getTargetID())
|
||||
P = TID->find(N) == 0;
|
||||
} else {
|
||||
Expr *Arg = CE->getArg(0);
|
||||
if (!Arg || Arg->getType() != Ctx.BuiltinFnTy) {
|
||||
Diag(Loc, diag::err_amdgcn_is_invocable_arg_invalid_value) << Arg;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
|
||||
CE->setType(BoolTy);
|
||||
return *ExpandedPredicates.insert(CE).first;
|
||||
}
|
||||
|
||||
auto *FD = cast<FunctionDecl>(Arg->getReferencedDeclOfCallee());
|
||||
|
||||
StringRef RF = Ctx.BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
|
||||
llvm::StringMap<bool> CF;
|
||||
Ctx.getFunctionFeatureMap(CF, FD);
|
||||
|
||||
P = Builtin::evaluateRequiredTargetFeatures(RF, CF);
|
||||
}
|
||||
|
||||
return *ExpandedPredicates
|
||||
.insert(
|
||||
IntegerLiteral::Create(Ctx, P ? True : False, BoolTy, Loc))
|
||||
.first;
|
||||
}
|
||||
|
||||
bool SemaAMDGPU::IsPredicate(Expr *E) const {
|
||||
return ExpandedPredicates.contains(E);
|
||||
}
|
||||
|
||||
void SemaAMDGPU::AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD) {
|
||||
PotentiallyUnguardedBuiltinUsers.insert(FD);
|
||||
}
|
||||
|
||||
bool SemaAMDGPU::HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const {
|
||||
return PotentiallyUnguardedBuiltinUsers.contains(FD);
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// This class implements -Wamdgpu-unguarded-builtin-usage.
|
||||
///
|
||||
/// This is done with a traversal of the AST of a function that includes a
|
||||
/// call to a target specific builtin. Whenever we encounter an \c if of the
|
||||
/// form: \c if(__builtin_amdgcn_is_invocable), we consider the then statement
|
||||
/// guarded.
|
||||
class DiagnoseUnguardedBuiltins : public DynamicRecursiveASTVisitor {
|
||||
// TODO: this could eventually be extended to consider what happens when there
|
||||
// are multiple target architectures specified via target("arch=gfxXXX")
|
||||
// target("arch=gfxyyy") etc., as well as feature disabling via "-XXX".
|
||||
Sema &SemaRef;
|
||||
|
||||
SmallVector<StringRef> TargetFeatures;
|
||||
SmallVector<std::pair<SourceLocation, StringRef>> CurrentGFXIP;
|
||||
SmallVector<unsigned> GuardedBuiltins;
|
||||
|
||||
static Expr *FindPredicate(Expr *Cond) {
|
||||
if (auto *CE = dyn_cast<CallExpr>(Cond)) {
|
||||
if (CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_is_invocable ||
|
||||
CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is)
|
||||
return Cond;
|
||||
} else if (auto *UO = dyn_cast<UnaryOperator>(Cond)) {
|
||||
return FindPredicate(UO->getSubExpr());
|
||||
} else if (auto *BO = dyn_cast<BinaryOperator>(Cond)) {
|
||||
if ((Cond = FindPredicate(BO->getLHS())))
|
||||
return Cond;
|
||||
return FindPredicate(BO->getRHS());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool EnterPredicateGuardedContext(CallExpr *P);
|
||||
void ExitPredicateGuardedContext(bool WasProcessorCheck);
|
||||
bool TraverseGuardedStmt(Stmt *S, CallExpr *P);
|
||||
|
||||
public:
|
||||
DiagnoseUnguardedBuiltins(Sema &SemaRef) : SemaRef(SemaRef) {
|
||||
if (auto *TAT = SemaRef.getCurFunctionDecl(true)->getAttr<TargetAttr>()) {
|
||||
// We use the somewhat misnamed x86 accessors because they provide exactly
|
||||
// what we require.
|
||||
TAT->getX86AddedFeatures(TargetFeatures);
|
||||
if (auto GFXIP = TAT->getX86Architecture())
|
||||
CurrentGFXIP.emplace_back(TAT->getLocation(), *GFXIP);
|
||||
}
|
||||
}
|
||||
|
||||
bool TraverseLambdaExpr(LambdaExpr *LE) override {
|
||||
if (SemaRef.AMDGPU().HasPotentiallyUnguardedBuiltinUsage(
|
||||
LE->getCallOperator()))
|
||||
return true; // We have already handled this.
|
||||
return DynamicRecursiveASTVisitor::TraverseLambdaExpr(LE);
|
||||
}
|
||||
|
||||
bool TraverseStmt(Stmt *S) override {
|
||||
if (!S)
|
||||
return true;
|
||||
return DynamicRecursiveASTVisitor::TraverseStmt(S);
|
||||
}
|
||||
|
||||
void IssueDiagnostics(Stmt *S) { TraverseStmt(S); }
|
||||
|
||||
bool TraverseIfStmt(IfStmt *If) override {
|
||||
if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(If->getCond())))
|
||||
return TraverseGuardedStmt(If, CE);
|
||||
return DynamicRecursiveASTVisitor::TraverseIfStmt(If);
|
||||
}
|
||||
|
||||
bool TraverseCaseStmt(CaseStmt *CS) override {
|
||||
return TraverseStmt(CS->getSubStmt());
|
||||
}
|
||||
|
||||
bool TraverseConditionalOperator(ConditionalOperator *CO) override {
|
||||
if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(CO->getCond())))
|
||||
return TraverseGuardedStmt(CO, CE);
|
||||
return DynamicRecursiveASTVisitor::TraverseConditionalOperator(CO);
|
||||
}
|
||||
|
||||
bool VisitAsmStmt(AsmStmt *ASM) override;
|
||||
bool VisitCallExpr(CallExpr *CE) override;
|
||||
};
|
||||
|
||||
bool DiagnoseUnguardedBuiltins::EnterPredicateGuardedContext(CallExpr *P) {
|
||||
bool IsProcessorCheck =
|
||||
P->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is;
|
||||
|
||||
if (IsProcessorCheck) {
|
||||
StringRef G = cast<clang::StringLiteral>(P->getArg(0))->getString();
|
||||
// TODO: handle generic ISAs.
|
||||
if (!CurrentGFXIP.empty() && G != CurrentGFXIP.back().second) {
|
||||
SemaRef.Diag(P->getExprLoc(),
|
||||
diag::err_amdgcn_conflicting_is_processor_options)
|
||||
<< P;
|
||||
SemaRef.Diag(CurrentGFXIP.back().first,
|
||||
diag::note_amdgcn_previous_is_processor_guard);
|
||||
}
|
||||
CurrentGFXIP.emplace_back(P->getExprLoc(), G);
|
||||
} else {
|
||||
auto *FD = cast<FunctionDecl>(
|
||||
cast<DeclRefExpr>(P->getArg(0))->getReferencedDeclOfCallee());
|
||||
GuardedBuiltins.push_back(FD->getBuiltinID());
|
||||
}
|
||||
|
||||
return IsProcessorCheck;
|
||||
}
|
||||
|
||||
void DiagnoseUnguardedBuiltins::ExitPredicateGuardedContext(bool WasProcCheck) {
|
||||
if (WasProcCheck)
|
||||
CurrentGFXIP.pop_back();
|
||||
else
|
||||
GuardedBuiltins.pop_back();
|
||||
}
|
||||
|
||||
inline std::pair<Stmt *, Stmt *> GetTraversalOrder(Stmt *S) {
|
||||
std::pair<Stmt *, Stmt *> Ordered;
|
||||
Expr *Condition = nullptr;
|
||||
|
||||
if (auto *CO = dyn_cast<ConditionalOperator>(S)) {
|
||||
Condition = CO->getCond();
|
||||
Ordered = {CO->getTrueExpr(), CO->getFalseExpr()};
|
||||
} else if (auto *If = dyn_cast<IfStmt>(S)) {
|
||||
Condition = If->getCond();
|
||||
Ordered = {If->getThen(), If->getElse()};
|
||||
}
|
||||
|
||||
if (auto *UO = dyn_cast<UnaryOperator>(Condition))
|
||||
if (UO->getOpcode() == UnaryOperatorKind::UO_LNot)
|
||||
std::swap(Ordered.first, Ordered.second);
|
||||
|
||||
return Ordered;
|
||||
}
|
||||
|
||||
bool DiagnoseUnguardedBuiltins::TraverseGuardedStmt(Stmt *S, CallExpr *P) {
|
||||
assert(S && "Unexpected missing Statement!");
|
||||
assert(P && "Unexpected missing Predicate!");
|
||||
|
||||
auto [Guarded, Unguarded] = GetTraversalOrder(S);
|
||||
|
||||
bool WasProcessorCheck = EnterPredicateGuardedContext(P);
|
||||
|
||||
bool Continue = TraverseStmt(Guarded);
|
||||
|
||||
ExitPredicateGuardedContext(WasProcessorCheck);
|
||||
|
||||
return Continue && TraverseStmt(Unguarded);
|
||||
}
|
||||
|
||||
bool DiagnoseUnguardedBuiltins::VisitAsmStmt(AsmStmt *ASM) {
|
||||
// TODO: should we check if the ASM is valid for the target? Can we?
|
||||
if (!CurrentGFXIP.empty())
|
||||
return true;
|
||||
|
||||
std::string S = ASM->generateAsmString(SemaRef.getASTContext());
|
||||
SemaRef.Diag(ASM->getAsmLoc(), diag::warn_amdgcn_unguarded_asm_stmt) << S;
|
||||
SemaRef.Diag(ASM->getAsmLoc(), diag::note_amdgcn_unguarded_asm_silence) << S;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DiagnoseUnguardedBuiltins::VisitCallExpr(CallExpr *CE) {
|
||||
unsigned ID = CE->getBuiltinCallee();
|
||||
Builtin::Context &BInfo = SemaRef.getASTContext().BuiltinInfo;
|
||||
|
||||
if (!ID)
|
||||
return true;
|
||||
if (!BInfo.isTSBuiltin(ID))
|
||||
return true;
|
||||
if (ID == AMDGPU::BI__builtin_amdgcn_processor_is ||
|
||||
ID == AMDGPU::BI__builtin_amdgcn_is_invocable)
|
||||
return true;
|
||||
if (llvm::find(GuardedBuiltins, ID) != GuardedBuiltins.end())
|
||||
return true;
|
||||
|
||||
StringRef FL(BInfo.getRequiredFeatures(ID));
|
||||
llvm::StringMap<bool> FeatureMap;
|
||||
if (CurrentGFXIP.empty()) {
|
||||
for (auto &&F : TargetFeatures)
|
||||
FeatureMap[F] = true;
|
||||
for (auto &&GID : GuardedBuiltins)
|
||||
for (auto &&F : llvm::split(BInfo.getRequiredFeatures(GID), ','))
|
||||
FeatureMap[F] = true;
|
||||
} else {
|
||||
static const llvm::Triple AMDGCN("amdgcn-amd-amdhsa");
|
||||
llvm::AMDGPU::fillAMDGPUFeatureMap(CurrentGFXIP.back().second, AMDGCN,
|
||||
FeatureMap);
|
||||
}
|
||||
|
||||
FunctionDecl *BI = CE->getDirectCallee();
|
||||
SourceLocation BICallLoc = CE->getExprLoc();
|
||||
if (Builtin::evaluateRequiredTargetFeatures(FL, FeatureMap)) {
|
||||
SemaRef.Diag(BICallLoc, diag::warn_amdgcn_unguarded_builtin) << BI;
|
||||
SemaRef.Diag(BICallLoc, diag::note_amdgcn_unguarded_builtin_silence) << BI;
|
||||
} else {
|
||||
StringRef GFXIP = CurrentGFXIP.empty() ? "" : CurrentGFXIP.back().second;
|
||||
SemaRef.Diag(BICallLoc, diag::err_amdgcn_incompatible_builtin)
|
||||
<< BI << FL << !CurrentGFXIP.empty() << GFXIP;
|
||||
if (!CurrentGFXIP.empty())
|
||||
SemaRef.Diag(CurrentGFXIP.back().first,
|
||||
diag::note_amdgcn_previous_is_processor_guard);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // Unnamed namespace
|
||||
|
||||
void SemaAMDGPU::DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD) {
|
||||
DiagnoseUnguardedBuiltins(SemaRef).IssueDiagnostics(FD->getBody());
|
||||
}
|
||||
} // namespace clang
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
#include "clang/Sema/Initialization.h"
|
||||
#include "clang/Sema/SemaAMDGPU.h"
|
||||
#include "clang/Sema/SemaHLSL.h"
|
||||
#include "clang/Sema/SemaObjC.h"
|
||||
#include "clang/Sema/SemaRISCV.h"
|
||||
@ -1592,6 +1593,13 @@ static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr,
|
||||
return TC_Success;
|
||||
}
|
||||
|
||||
if (SrcType == Self.Context.AMDGPUFeaturePredicateTy &&
|
||||
DestType == Self.Context.getLogicalOperationType()) {
|
||||
SrcExpr = Self.AMDGPU().ExpandAMDGPUPredicateBuiltIn(SrcExpr.get());
|
||||
Kind = CK_NoOp;
|
||||
return TC_Success;
|
||||
}
|
||||
|
||||
// We tried everything. Everything! Nothing works! :-(
|
||||
return TC_NotApplicable;
|
||||
}
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#include "clang/Sema/ParsedTemplate.h"
|
||||
#include "clang/Sema/Scope.h"
|
||||
#include "clang/Sema/ScopeInfo.h"
|
||||
#include "clang/Sema/SemaAMDGPU.h"
|
||||
#include "clang/Sema/SemaARM.h"
|
||||
#include "clang/Sema/SemaCUDA.h"
|
||||
#include "clang/Sema/SemaHLSL.h"
|
||||
@ -13940,6 +13941,16 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
|
||||
return;
|
||||
}
|
||||
|
||||
// __amdgpu_feature_predicate_t cannot be initialised
|
||||
if (VDecl->getType().getDesugaredType(Context) ==
|
||||
Context.AMDGPUFeaturePredicateTy) {
|
||||
Diag(VDecl->getLocation(),
|
||||
diag::err_amdgcn_predicate_type_is_not_constructible)
|
||||
<< VDecl;
|
||||
VDecl->setInvalidDecl();
|
||||
return;
|
||||
}
|
||||
|
||||
// WebAssembly tables can't be used to initialise a variable.
|
||||
if (!Init->getType().isNull() && Init->getType()->isWebAssemblyTableType()) {
|
||||
Diag(Init->getExprLoc(), diag::err_wasm_table_art) << 0;
|
||||
@ -14462,6 +14473,13 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
|
||||
if (VarDecl *Var = dyn_cast<VarDecl>(RealDecl)) {
|
||||
QualType Type = Var->getType();
|
||||
|
||||
if (Type.getDesugaredType(Context) == Context.AMDGPUFeaturePredicateTy) {
|
||||
Diag(Var->getLocation(),
|
||||
diag::err_amdgcn_predicate_type_is_not_constructible)
|
||||
<< Var;
|
||||
Var->setInvalidDecl();
|
||||
return;
|
||||
}
|
||||
// C++1z [dcl.dcl]p1 grammar implies that an initializer is mandatory.
|
||||
if (isa<DecompositionDecl>(RealDecl)) {
|
||||
Diag(Var->getLocation(), diag::err_decomp_decl_requires_init) << Var;
|
||||
@ -17021,8 +17039,12 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, bool IsInstantiation,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (Body && FSI->HasPotentialAvailabilityViolations)
|
||||
DiagnoseUnguardedAvailabilityViolations(dcl);
|
||||
if (Body) {
|
||||
if (FSI->HasPotentialAvailabilityViolations)
|
||||
DiagnoseUnguardedAvailabilityViolations(dcl);
|
||||
else if (AMDGPU().HasPotentiallyUnguardedBuiltinUsage(FD))
|
||||
AMDGPU().DiagnoseUnguardedBuiltinUsage(FD);
|
||||
}
|
||||
|
||||
assert(!FSI->ObjCShouldCallSuper &&
|
||||
"This should only be set for ObjC methods, which should have been "
|
||||
|
||||
@ -53,6 +53,7 @@
|
||||
#include "clang/Sema/ParsedTemplate.h"
|
||||
#include "clang/Sema/Scope.h"
|
||||
#include "clang/Sema/ScopeInfo.h"
|
||||
#include "clang/Sema/SemaAMDGPU.h"
|
||||
#include "clang/Sema/SemaARM.h"
|
||||
#include "clang/Sema/SemaCUDA.h"
|
||||
#include "clang/Sema/SemaFixItUtils.h"
|
||||
@ -6779,6 +6780,22 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
|
||||
if (Result.isInvalid()) return ExprError();
|
||||
Fn = Result.get();
|
||||
|
||||
// The __builtin_amdgcn_is_invocable builtin is special, and will be resolved
|
||||
// later, when we check boolean conditions, for now we merely forward it
|
||||
// without any additional checking.
|
||||
if (Fn->getType() == Context.BuiltinFnTy && ArgExprs.size() == 1 &&
|
||||
ArgExprs[0]->getType() == Context.BuiltinFnTy) {
|
||||
const auto *FD = cast<FunctionDecl>(Fn->getReferencedDeclOfCallee());
|
||||
|
||||
if (FD->getName() == "__builtin_amdgcn_is_invocable") {
|
||||
QualType FnPtrTy = Context.getPointerType(FD->getType());
|
||||
Expr *R = ImpCastExprToType(Fn, FnPtrTy, CK_BuiltinFnToFnPtr).get();
|
||||
return CallExpr::Create(
|
||||
Context, R, ArgExprs, Context.AMDGPUFeaturePredicateTy,
|
||||
ExprValueKind::VK_PRValue, RParenLoc, FPOptionsOverride());
|
||||
}
|
||||
}
|
||||
|
||||
if (CheckArgsForPlaceholders(ArgExprs))
|
||||
return ExprError();
|
||||
|
||||
@ -6897,6 +6914,15 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
|
||||
|
||||
FunctionDecl *FDecl = dyn_cast<FunctionDecl>(NDecl);
|
||||
if (FDecl && FDecl->getBuiltinID()) {
|
||||
const llvm::Triple &Triple = Context.getTargetInfo().getTriple();
|
||||
if (Triple.isSPIRV() && Triple.getVendor() == llvm::Triple::AMD) {
|
||||
if (Context.BuiltinInfo.isTSBuiltin(FDecl->getBuiltinID()) &&
|
||||
!Context.BuiltinInfo.isAuxBuiltinID(FDecl->getBuiltinID())) {
|
||||
AMDGPU().AddPotentiallyUnguardedBuiltinUser(cast<FunctionDecl>(
|
||||
getFunctionLevelDeclContext(/*AllowLambda=*/true)));
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite the function decl for this builtin by replacing parameters
|
||||
// with no explicit address space with the address space of the arguments
|
||||
// in ArgExprs.
|
||||
@ -13905,6 +13931,11 @@ inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS,
|
||||
if (RHS.isInvalid())
|
||||
return QualType();
|
||||
|
||||
if (LHS.get()->getType() == Context.AMDGPUFeaturePredicateTy)
|
||||
LHS = AMDGPU().ExpandAMDGPUPredicateBuiltIn(LHS.get());
|
||||
if (RHS.get()->getType() == Context.AMDGPUFeaturePredicateTy)
|
||||
RHS = AMDGPU().ExpandAMDGPUPredicateBuiltIn(RHS.get());
|
||||
|
||||
if (!LHS.get()->getType()->isScalarType() ||
|
||||
!RHS.get()->getType()->isScalarType())
|
||||
return InvalidOperands(Loc, LHS, RHS);
|
||||
@ -16315,6 +16346,10 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
|
||||
// Vector logical not returns the signed variant of the operand type.
|
||||
resultType = GetSignedVectorType(resultType);
|
||||
break;
|
||||
} else if (resultType == Context.AMDGPUFeaturePredicateTy) {
|
||||
resultType = Context.getLogicalOperationType();
|
||||
Input = AMDGPU().ExpandAMDGPUPredicateBuiltIn(InputExpr);
|
||||
break;
|
||||
} else {
|
||||
return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
|
||||
<< resultType << Input.get()->getSourceRange());
|
||||
@ -21176,6 +21211,9 @@ ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E,
|
||||
E = result.get();
|
||||
|
||||
if (!E->isTypeDependent()) {
|
||||
if (E->getType() == Context.AMDGPUFeaturePredicateTy)
|
||||
return AMDGPU().ExpandAMDGPUPredicateBuiltIn(E);
|
||||
|
||||
if (getLangOpts().CPlusPlus)
|
||||
return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4
|
||||
|
||||
|
||||
@ -9244,6 +9244,15 @@ bool InitializationSequence::Diagnose(Sema &S,
|
||||
|
||||
case FK_ConversionFailed: {
|
||||
QualType FromType = OnlyArg->getType();
|
||||
// __amdgpu_feature_predicate_t can be explicitly cast to the logical op
|
||||
// type, although this is almost always an error and we advise against it.
|
||||
if (FromType == S.Context.AMDGPUFeaturePredicateTy &&
|
||||
DestType == S.Context.getLogicalOperationType()) {
|
||||
S.Diag(OnlyArg->getExprLoc(),
|
||||
diag::err_amdgcn_predicate_type_needs_explicit_bool_cast)
|
||||
<< OnlyArg << DestType;
|
||||
break;
|
||||
}
|
||||
PartialDiagnostic PDiag = S.PDiag(diag::err_init_conversion_failed)
|
||||
<< (int)Entity.getKind()
|
||||
<< DestType
|
||||
@ -10056,6 +10065,14 @@ Sema::PerformCopyInitialization(const InitializedEntity &Entity,
|
||||
if (EqualLoc.isInvalid())
|
||||
EqualLoc = InitE->getBeginLoc();
|
||||
|
||||
if (Entity.getType().getDesugaredType(Context) ==
|
||||
Context.AMDGPUFeaturePredicateTy &&
|
||||
Entity.getDecl()) {
|
||||
Diag(EqualLoc, diag::err_amdgcn_predicate_type_is_not_constructible)
|
||||
<< Entity.getDecl();
|
||||
return ExprError();
|
||||
}
|
||||
|
||||
InitializationKind Kind = InitializationKind::CreateCopy(
|
||||
InitE->getBeginLoc(), EqualLoc, AllowExplicit);
|
||||
InitializationSequence Seq(*this, Entity, Kind, InitE, TopLevelOfInitList);
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "clang/Sema/Initialization.h"
|
||||
#include "clang/Sema/Lookup.h"
|
||||
#include "clang/Sema/Overload.h"
|
||||
#include "clang/Sema/SemaAMDGPU.h"
|
||||
#include "clang/Sema/SemaARM.h"
|
||||
#include "clang/Sema/SemaCUDA.h"
|
||||
#include "clang/Sema/SemaObjC.h"
|
||||
@ -6365,12 +6366,13 @@ TryContextuallyConvertToBool(Sema &S, Expr *From) {
|
||||
ExprResult Sema::PerformContextuallyConvertToBool(Expr *From) {
|
||||
if (checkPlaceholderForOverload(*this, From))
|
||||
return ExprError();
|
||||
if (From->getType() == Context.AMDGPUFeaturePredicateTy)
|
||||
return AMDGPU().ExpandAMDGPUPredicateBuiltIn(From);
|
||||
|
||||
ImplicitConversionSequence ICS = TryContextuallyConvertToBool(*this, From);
|
||||
if (!ICS.isBad())
|
||||
return PerformImplicitConversion(From, Context.BoolTy, ICS,
|
||||
AssignmentAction::Converting);
|
||||
|
||||
if (!DiagnoseMultipleUserDefinedConversion(From, Context.BoolTy))
|
||||
return Diag(From->getBeginLoc(), diag::err_typecheck_bool_condition)
|
||||
<< From->getType() << From->getSourceRange();
|
||||
@ -12218,6 +12220,16 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
|
||||
if (TakingCandidateAddress && !checkAddressOfCandidateIsAvailable(S, Fn))
|
||||
return;
|
||||
|
||||
// __amdgpu_feature_predicate_t can be explicitly cast to the logical op type,
|
||||
// although this is almost always an error and we advise against it.
|
||||
if (FromTy == S.Context.AMDGPUFeaturePredicateTy &&
|
||||
ToTy == S.Context.getLogicalOperationType()) {
|
||||
S.Diag(Conv.Bad.FromExpr->getExprLoc(),
|
||||
diag::err_amdgcn_predicate_type_needs_explicit_bool_cast)
|
||||
<< Conv.Bad.FromExpr << ToTy;
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit the generic diagnostic and, optionally, add the hints to it.
|
||||
PartialDiagnostic FDiag = S.PDiag(diag::note_ovl_candidate_bad_conv);
|
||||
FDiag << (unsigned)FnKindPair.first << (unsigned)FnKindPair.second << FnDesc
|
||||
|
||||
72
clang/test/CodeGen/amdgpu-builtin-is-invocable.c
Normal file
72
clang/test/CodeGen/amdgpu-builtin-is-invocable.c
Normal file
@ -0,0 +1,72 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX900 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX1010 %s
|
||||
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCNSPIRV %s
|
||||
|
||||
// Test that, depending on triple and, if applicable, target-cpu, one of three
|
||||
// things happens:
|
||||
// 1) for gfx900 we emit an empty kernel (concrete target, lacks feature)
|
||||
// 2) for gfx1010 we emit a call to trap (concrete target, has feature)
|
||||
// 3) for AMDGCNSPIRV we emit a boolean specialisation constant, via a call
|
||||
// to __spirv_SpecConstant, with the id of UINT32_MAX, and the boolean
|
||||
// value of false, which will yield an OpSpecConstantFalse in SPIR-V
|
||||
|
||||
// AMDGCN-GFX900-LABEL: define dso_local void @foo(
|
||||
// AMDGCN-GFX900-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCN-GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-GFX900-NEXT: ret void
|
||||
//
|
||||
// AMDGCN-GFX1010-LABEL: define dso_local void @foo(
|
||||
// AMDGCN-GFX1010-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCN-GFX1010-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-GFX1010-NEXT: call void @llvm.trap()
|
||||
// AMDGCN-GFX1010-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: define spir_func void @foo(
|
||||
// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META2:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL:%.*]] = icmp ne i1 [[TMP0]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[LOR_LHS_FALSE:.*]]
|
||||
// AMDGCNSPIRV: [[LOR_LHS_FALSE]]:
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META3:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL1:%.*]] = icmp ne i1 [[TMP1]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL1]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE2:.*]]
|
||||
// AMDGCNSPIRV: [[LOR_LHS_FALSE2]]:
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META4:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL3:%.*]] = icmp ne i1 [[TMP2]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL3]], label %[[IF_THEN]], label %[[IF_END:.*]]
|
||||
// AMDGCNSPIRV: [[IF_THEN]]:
|
||||
// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.trap()
|
||||
// AMDGCNSPIRV-NEXT: br label %[[IF_END]]
|
||||
// AMDGCNSPIRV: [[IF_END]]:
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
void foo() {
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16) ||
|
||||
(__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16_var)) ||
|
||||
(__builtin_amdgcn_is_invocable(__builtin_amdgcn_ashr_pk_i8_i32)))
|
||||
return __builtin_trap();
|
||||
}
|
||||
//.
|
||||
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" }
|
||||
//.
|
||||
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" }
|
||||
// AMDGCN-GFX1010: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
|
||||
//.
|
||||
// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
|
||||
// AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind }
|
||||
// AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
|
||||
//.
|
||||
// AMDGCN-GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
// AMDGCN-GFX900: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
||||
//.
|
||||
// AMDGCN-GFX1010: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
// AMDGCN-GFX1010: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
||||
//.
|
||||
// AMDGCNSPIRV: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
// AMDGCNSPIRV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
||||
// AMDGCNSPIRV: [[META2]] = !{!"has.gfx10-insts"}
|
||||
// AMDGCNSPIRV: [[META3]] = !{!"has.gfx12-insts"}
|
||||
// AMDGCNSPIRV: [[META4]] = !{!"has.ashr-pk-insts"}
|
||||
//.
|
||||
83
clang/test/CodeGen/amdgpu-builtin-processor-is.c
Normal file
83
clang/test/CodeGen/amdgpu-builtin-processor-is.c
Normal file
@ -0,0 +1,83 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX900 %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX1010 %s
|
||||
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCNSPIRV %s
|
||||
|
||||
// Test that, depending on triple and, if applicable, target-cpu, one of three
|
||||
// things happens:
|
||||
// 1) for gfx900 we emit a call to trap (concrete target, matches)
|
||||
// 2) for gfx1010 we emit an empty kernel (concrete target, does not match)
|
||||
// 3) for AMDGCNSPIRV we emit a boolean specialisation constant, via a call
|
||||
// to __spirv_SpecConstant, with the id of UINT32_MAX, and the boolean
|
||||
// value of false, which will yield an OpSpecConstantFalse in SPIR-V
|
||||
// AMDGCN-GFX900-LABEL: define dso_local void @foo(
|
||||
// AMDGCN-GFX900-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCN-GFX900-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-GFX900-NEXT: call void @llvm.trap()
|
||||
// AMDGCN-GFX900-NEXT: ret void
|
||||
//
|
||||
// AMDGCN-GFX1010-LABEL: define dso_local void @foo(
|
||||
// AMDGCN-GFX1010-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCN-GFX1010-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCN-GFX1010-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: define spir_func void @foo(
|
||||
// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
|
||||
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META2:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL:%.*]] = icmp ne i1 [[TMP0]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[LOR_LHS_FALSE:.*]]
|
||||
// AMDGCNSPIRV: [[LOR_LHS_FALSE]]:
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META3:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL1:%.*]] = icmp ne i1 [[TMP1]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL1]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE2:.*]]
|
||||
// AMDGCNSPIRV: [[LOR_LHS_FALSE2]]:
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META4:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL3:%.*]] = icmp ne i1 [[TMP2]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL3]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE4:.*]]
|
||||
// AMDGCNSPIRV: [[LOR_LHS_FALSE4]]:
|
||||
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META5:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL5:%.*]] = icmp ne i1 [[TMP3]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL5]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE6:.*]]
|
||||
// AMDGCNSPIRV: [[LOR_LHS_FALSE6]]:
|
||||
// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META6:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: [[TOBOOL7:%.*]] = icmp ne i1 [[TMP4]], false
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL7]], label %[[IF_THEN]], label %[[IF_END:.*]]
|
||||
// AMDGCNSPIRV: [[IF_THEN]]:
|
||||
// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.trap()
|
||||
// AMDGCNSPIRV-NEXT: br label %[[IF_END]]
|
||||
// AMDGCNSPIRV: [[IF_END]]:
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
void foo() {
|
||||
if (__builtin_amdgcn_processor_is("gfx900") ||
|
||||
__builtin_amdgcn_processor_is("gfx906") ||
|
||||
__builtin_amdgcn_processor_is("gfx90c") ||
|
||||
(__builtin_amdgcn_processor_is("gfx90a")) ||
|
||||
(__builtin_amdgcn_processor_is("gfx942")))
|
||||
return __builtin_trap();
|
||||
}
|
||||
//.
|
||||
// AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" }
|
||||
// AMDGCN-GFX900: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
|
||||
//.
|
||||
// AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" }
|
||||
//.
|
||||
// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
|
||||
// AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind }
|
||||
// AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
|
||||
//.
|
||||
// AMDGCN-GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
// AMDGCN-GFX900: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
||||
//.
|
||||
// AMDGCN-GFX1010: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
// AMDGCN-GFX1010: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
||||
//.
|
||||
// AMDGCNSPIRV: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
|
||||
// AMDGCNSPIRV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
|
||||
// AMDGCNSPIRV: [[META2]] = !{!"is.gfx900"}
|
||||
// AMDGCNSPIRV: [[META3]] = !{!"is.gfx906"}
|
||||
// AMDGCNSPIRV: [[META4]] = !{!"is.gfx90c"}
|
||||
// AMDGCNSPIRV: [[META5]] = !{!"is.gfx90a"}
|
||||
// AMDGCNSPIRV: [[META6]] = !{!"is.gfx942"}
|
||||
//.
|
||||
48
clang/test/CodeGen/amdgpu-feature-builtins-invalid-use.cpp
Normal file
48
clang/test/CodeGen/amdgpu-feature-builtins-invalid-use.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - 2>&1 | FileCheck %s
|
||||
// RUN: not %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - 2>&1 | FileCheck %s
|
||||
|
||||
bool predicate(bool x);
|
||||
void pass_by_value(__amdgpu_feature_predicate_t x);
|
||||
|
||||
void invalid_uses(int *p, int x, const __amdgpu_feature_predicate_t &lv,
|
||||
__amdgpu_feature_predicate_t &&rv) {
|
||||
// CHECK: error: 'a' has type __amdgpu_feature_predicate_t, which is not constructible
|
||||
__amdgpu_feature_predicate_t a;
|
||||
// CHECK: error: 'b' has type __amdgpu_feature_predicate_t, which is not constructible
|
||||
__amdgpu_feature_predicate_t b = __builtin_amdgcn_processor_is("gfx906");
|
||||
// CHECK: error: 'c' has type __amdgpu_feature_predicate_t, which is not constructible
|
||||
__amdgpu_feature_predicate_t c = lv;
|
||||
// CHECK: error: 'd' has type __amdgpu_feature_predicate_t, which is not constructible
|
||||
__amdgpu_feature_predicate_t d = rv;
|
||||
// CHECK: error: '__builtin_amdgcn_processor_is("gfx906")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
|
||||
bool invalid_use_in_init_0 = __builtin_amdgcn_processor_is("gfx906");
|
||||
// CHECK: error: 'x' has type __amdgpu_feature_predicate_t, which is not constructible
|
||||
pass_by_value(__builtin_amdgcn_processor_is("gfx906"));
|
||||
// CHECK: error: '__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
|
||||
bool invalid_use_in_init_1 = __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
|
||||
// CHECK: error: '__builtin_amdgcn_processor_is("gfx906")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
|
||||
if (bool invalid_use_in_init_2 = __builtin_amdgcn_processor_is("gfx906")) return;
|
||||
// CHECK: error: '__builtin_amdgcn_processor_is("gfx1200")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
|
||||
if (predicate(__builtin_amdgcn_processor_is("gfx1200"))) __builtin_amdgcn_s_sleep_var(x);
|
||||
}
|
||||
|
||||
void invalid_invocations(int x, const char* str) {
|
||||
// CHECK: error: the argument to __builtin_amdgcn_processor_is must be a valid AMDGCN processor identifier; 'not_an_amdgcn_gfx_id' is not valid
|
||||
// CHECK-DAG: note: valid AMDGCN processor identifiers are: {{.*}}gfx{{.*}}
|
||||
if (__builtin_amdgcn_processor_is("not_an_amdgcn_gfx_id")) return;
|
||||
// CHECK: error: the argument to __builtin_amdgcn_processor_is must be a string literal
|
||||
if (__builtin_amdgcn_processor_is(str)) return;
|
||||
// CHECK: error: the argument to __builtin_amdgcn_is_invocable must be either a target agnostic builtin or an AMDGCN target specific builtin; {{.*}}__builtin_amdgcn_s_sleep_var{{.*}} is not valid
|
||||
if (__builtin_amdgcn_is_invocable("__builtin_amdgcn_s_sleep_var")) return;
|
||||
// CHECK: error: the argument to __builtin_amdgcn_is_invocable must be either a target agnostic builtin or an AMDGCN target specific builtin; {{.*}}str{{.*}} is not valid
|
||||
else if (__builtin_amdgcn_is_invocable(str)) return;
|
||||
// CHECK: error: the argument to __builtin_amdgcn_is_invocable must be either a target agnostic builtin or an AMDGCN target specific builtin; {{.*}}x{{.*}} is not valid
|
||||
else if (__builtin_amdgcn_is_invocable(x)) return;
|
||||
// CHECK: error: use of undeclared identifier '__builtin_ia32_pause'
|
||||
else if (__builtin_amdgcn_is_invocable(__builtin_ia32_pause)) return;
|
||||
}
|
||||
|
||||
bool return_needs_cast() {
|
||||
// CHECK: error: '__builtin_amdgcn_processor_is("gfx900")' must be explicitly cast to 'bool'; however, please note that this is almost always an error and that it prevents the effective guarding of target dependent code, and thus should be avoided
|
||||
return __builtin_amdgcn_processor_is("gfx900");
|
||||
}
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \
|
||||
// RUN: -aux-triple x86_64-pc-windows-msvc -fcuda-is-device -emit-llvm %s \
|
||||
// RUN: -o - | FileCheck %s
|
||||
// RUN: -o - | FileCheck %s --check-prefix=AMDGCNSPIRV
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
@ -28,6 +28,25 @@
|
||||
// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z16use_dispatch_ptrPi(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[DISPATCH_PTR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast ptr [[DISPATCH_PTR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void use_dispatch_ptr(int* out) {
|
||||
const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr();
|
||||
*out = *dispatch_ptr;
|
||||
@ -52,6 +71,25 @@ __global__ void use_dispatch_ptr(int* out) {
|
||||
// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z13use_queue_ptrPi(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[QUEUE_PTR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[QUEUE_PTR_ASCAST:%.*]] = addrspacecast ptr [[QUEUE_PTR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.queue.ptr()
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void use_queue_ptr(int* out) {
|
||||
const int* queue_ptr = (const int*)__builtin_amdgcn_queue_ptr();
|
||||
*out = *queue_ptr;
|
||||
@ -76,14 +114,30 @@ __global__ void use_queue_ptr(int* out) {
|
||||
// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z19use_implicitarg_ptrPi(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[IMPLICITARG_PTR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[IMPLICITARG_PTR_ASCAST:%.*]] = addrspacecast ptr [[IMPLICITARG_PTR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void use_implicitarg_ptr(int* out) {
|
||||
const int* implicitarg_ptr = (const int*)__builtin_amdgcn_implicitarg_ptr();
|
||||
*out = *implicitarg_ptr;
|
||||
}
|
||||
|
||||
__global__
|
||||
//
|
||||
void
|
||||
// CHECK-LABEL: @_Z12test_ds_fmaxf(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
|
||||
@ -96,7 +150,21 @@ __global__
|
||||
// CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
test_ds_fmax(float src) {
|
||||
// AMDGCNSPIRV-LABEL: @_Z12test_ds_fmaxf(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw fmax ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]] monotonic, align 4
|
||||
// AMDGCNSPIRV-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_ds_fmax(float src) {
|
||||
//
|
||||
//
|
||||
__shared__ float shared;
|
||||
volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false);
|
||||
}
|
||||
@ -113,6 +181,18 @@ __global__
|
||||
// CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z12test_ds_faddf(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspace(3) @_ZZ12test_ds_faddfE6shared, float [[TMP0]] monotonic, align 4
|
||||
// AMDGCNSPIRV-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_ds_fadd(float src) {
|
||||
__shared__ float shared;
|
||||
volatile float x = __builtin_amdgcn_ds_faddf(&shared, src, 0, 0, false);
|
||||
@ -139,6 +219,27 @@ __global__ void test_ds_fadd(float src) {
|
||||
// CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z12test_ds_fminfPf(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr [[SHARED]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3)
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
|
||||
// AMDGCNSPIRV-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_ds_fmin(float src, float *shared) {
|
||||
volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false);
|
||||
}
|
||||
@ -155,6 +256,11 @@ __device__ void test_ret_builtin_nondef_addrspace() {
|
||||
// CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.endpgm()
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z6endpgmv(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.amdgcn.endpgm()
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void endpgm() {
|
||||
__builtin_amdgcn_endpgm();
|
||||
}
|
||||
@ -183,6 +289,28 @@ __global__ void endpgm() {
|
||||
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[TMP3]], align 8
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z14test_uicmp_i64Pyyy(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
|
||||
// AMDGCNSPIRV-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store i64 [[A:%.*]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store i64 [[B:%.*]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(4) [[B_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP0]], i64 [[TMP1]], i32 35)
|
||||
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[TMP3]], align 8
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b)
|
||||
{
|
||||
*out = __builtin_amdgcn_uicmpl(a, b, 30+5);
|
||||
@ -199,14 +327,39 @@ __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, un
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[TMP1]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META5:![0-9]+]])
|
||||
// CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
||||
// CHECK: if.then:
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[TMP2]], align 8
|
||||
// CHECK-NEXT: br label [[IF_END]]
|
||||
// CHECK: if.end:
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z14test_s_memtimePy(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META7:![0-9]+]])
|
||||
// AMDGCNSPIRV-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
||||
// AMDGCNSPIRV: if.then:
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[TMP2]], align 8
|
||||
// AMDGCNSPIRV-NEXT: br label [[IF_END]]
|
||||
// AMDGCNSPIRV: if.end:
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_s_memtime(unsigned long long* out)
|
||||
{
|
||||
*out = __builtin_amdgcn_s_memtime();
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memtime))
|
||||
*out = __builtin_amdgcn_s_memtime();
|
||||
}
|
||||
|
||||
// Check a generic pointer can be passed as a shared pointer and a generic pointer.
|
||||
@ -232,9 +385,32 @@ __device__ void func(float *x);
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
|
||||
// CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
|
||||
// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR7:[0-9]+]]
|
||||
// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR8:[0-9]+]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z17test_ds_fmin_funcfPf(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca float, align 4
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr [[SHARED]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3)
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
|
||||
// AMDGCNSPIRV-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
|
||||
// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR8:[0-9]+]]
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
|
||||
volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false);
|
||||
func(shared);
|
||||
@ -258,6 +434,24 @@ __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
|
||||
// CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z14test_is_sharedPf(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[RET:%.*]] = alloca i8, align 1
|
||||
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.shared(ptr [[TMP1]])
|
||||
// AMDGCNSPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8
|
||||
// AMDGCNSPIRV-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_is_shared(float *x){
|
||||
bool ret = __builtin_amdgcn_is_shared(x);
|
||||
}
|
||||
@ -280,6 +474,24 @@ __global__ void test_is_shared(float *x){
|
||||
// CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// AMDGCNSPIRV-LABEL: @_Z15test_is_privatePi(
|
||||
// AMDGCNSPIRV-NEXT: entry:
|
||||
// AMDGCNSPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
|
||||
// AMDGCNSPIRV-NEXT: [[RET:%.*]] = alloca i8, align 1
|
||||
// AMDGCNSPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4)
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
|
||||
// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
|
||||
// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.private(ptr [[TMP1]])
|
||||
// AMDGCNSPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8
|
||||
// AMDGCNSPIRV-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
|
||||
// AMDGCNSPIRV-NEXT: ret void
|
||||
//
|
||||
__global__ void test_is_private(int *x){
|
||||
bool ret = __builtin_amdgcn_is_private(x);
|
||||
}
|
||||
|
||||
@ -21,26 +21,30 @@ constexpr static bool BountCtrl()
|
||||
return true & false;
|
||||
}
|
||||
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 16, i32 0, i32 0, i1 false)
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
|
||||
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
|
||||
}
|
||||
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 4, i32 0, i1 false)
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
|
||||
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
|
||||
}
|
||||
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 3, i1 false)
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
|
||||
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
|
||||
}
|
||||
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 0, i1 false)
|
||||
// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
|
||||
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
|
||||
}
|
||||
|
||||
@ -12,78 +12,88 @@ typedef unsigned long ulong;
|
||||
// CHECK-LABEL: @test_permlane16(
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane16.i32(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
|
||||
void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
|
||||
*out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane16))
|
||||
*out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_permlanex16(
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlanex16.i32(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
|
||||
void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
|
||||
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16))
|
||||
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp8_uint(
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
|
||||
// CHECK-NEXT: store i32 %0,
|
||||
// CHECK-NEXT: store i32 %[[#]],
|
||||
void test_mov_dpp8_uint(global uint* out, uint a) {
|
||||
*out = __builtin_amdgcn_mov_dpp8(a, 1);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
|
||||
*out = __builtin_amdgcn_mov_dpp8(a, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp8_long(
|
||||
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %a, i32 1)
|
||||
// CHECK-NEXT: store i64 %0,
|
||||
// CHECK-NEXT: store i64 %[[#]],
|
||||
void test_mov_dpp8_long(global long* out, long a) {
|
||||
*out = __builtin_amdgcn_mov_dpp8(a, 1);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
|
||||
*out = __builtin_amdgcn_mov_dpp8(a, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp8_float(
|
||||
// CHECK: %0 = bitcast float %a to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
|
||||
// CHECK-NEXT: store i32 %1,
|
||||
// CHECK: %[[BC:[0-9]+]] = bitcast float %a to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[BC]], i32 1)
|
||||
// CHECK-NEXT: store i32 %[[DPP_RET]],
|
||||
void test_mov_dpp8_float(global float* out, float a) {
|
||||
*out = __builtin_amdgcn_mov_dpp8(a, 1);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
|
||||
*out = __builtin_amdgcn_mov_dpp8(a, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp8_double
|
||||
// CHECK: %0 = bitcast double %x to i64
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1)
|
||||
// CHECK-NEXT: store i64 %1,
|
||||
// CHECK: %[[BC:[0-9]+]] = bitcast double %x to i64
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %[[BC]], i32 1)
|
||||
// CHECK-NEXT: store i64 %[[DPP_RET]],
|
||||
void test_mov_dpp8_double(double x, global double *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp8(x, 1);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
|
||||
*p = __builtin_amdgcn_mov_dpp8(x, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp8_short
|
||||
// CHECK: %0 = zext i16 %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
|
||||
// CHECK-NEXT: %2 = trunc i32 %1 to i16
|
||||
// CHECK-NEXT: store i16 %2,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[ZEXT]], i32 1)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
|
||||
// CHECK-NEXT: store i16 %[[TRUNC]],
|
||||
void test_mov_dpp8_short(short x, global short *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp8(x, 1);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
|
||||
*p = __builtin_amdgcn_mov_dpp8(x, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp8_char
|
||||
// CHECK: %0 = zext i8 %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1)
|
||||
// CHECK-NEXT: %2 = trunc i32 %1 to i8
|
||||
// CHECK-NEXT: store i8 %2,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i8 %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[ZEXT]], i32 1)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i8
|
||||
// CHECK-NEXT: store i8 %[[TRUNC]],
|
||||
void test_mov_dpp8_char(char x, global char *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp8(x, 1);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
|
||||
*p = __builtin_amdgcn_mov_dpp8(x, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp8_half
|
||||
// CHECK: %0 = load i16,
|
||||
// CHECK: %1 = zext i16 %0 to i32
|
||||
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %1, i32 1)
|
||||
// CHECK-NEXT: %3 = trunc i32 %2 to i16
|
||||
// CHECK-NEXT: store i16 %3,
|
||||
// CHECK: %[[LD:[0-9]+]] = load i16,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %[[LD]] to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %[[ZEXT]], i32 1)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
|
||||
// CHECK-NEXT: store i16 %[[TRUNC]],
|
||||
void test_mov_dpp8_half(half *x, global half *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp8(*x, 1);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp8))
|
||||
*p = __builtin_amdgcn_mov_dpp8(*x, 1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_s_memtime
|
||||
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
|
||||
void test_s_memtime(global ulong* out)
|
||||
{
|
||||
*out = __builtin_amdgcn_s_memtime();
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memtime))
|
||||
*out = __builtin_amdgcn_s_memtime();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_groupstaticsize
|
||||
@ -97,5 +107,6 @@ void test_groupstaticsize(global uint* out)
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
|
||||
void test_ballot_wave32(global uint* out, int a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_ballot_w32(a == b);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ballot_w32))
|
||||
*out = __builtin_amdgcn_ballot_w32(a == b);
|
||||
}
|
||||
|
||||
@ -18,36 +18,41 @@ typedef uint uint4 __attribute__((ext_vector_type(4)));
|
||||
// CHECK-LABEL: @test_s_sendmsg_rtn(
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
|
||||
void test_s_sendmsg_rtn(global uint* out) {
|
||||
*out = __builtin_amdgcn_s_sendmsg_rtn(0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sendmsg_rtn))
|
||||
*out = __builtin_amdgcn_s_sendmsg_rtn(0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_s_sendmsg_rtnl(
|
||||
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 0)
|
||||
void test_s_sendmsg_rtnl(global ulong* out) {
|
||||
*out = __builtin_amdgcn_s_sendmsg_rtnl(0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sendmsg_rtnl))
|
||||
*out = __builtin_amdgcn_s_sendmsg_rtnl(0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_ds_bvh_stack_rtn(
|
||||
// CHECK: %0 = tail call{{.*}} { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
|
||||
// CHECK: %1 = extractvalue { i32, i32 } %0, 0
|
||||
// CHECK: %2 = extractvalue { i32, i32 } %0, 1
|
||||
// CHECK: %3 = insertelement <2 x i32> poison, i32 %1, i64 0
|
||||
// CHECK: %4 = insertelement <2 x i32> %3, i32 %2, i64 1
|
||||
// CHECK: %[[#BVH_STACK:]] = tail call{{.*}} { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
|
||||
// CHECK: %[[#RET_FIRST:]] = extractvalue { i32, i32 } %[[#BVH_STACK]], 0
|
||||
// CHECK: %[[#RET_SECOND:]] = extractvalue { i32, i32 } %[[#BVH_STACK]], 1
|
||||
// CHECK: %[[#OUT:]] = insertelement <2 x i32> poison, i32 %[[#RET_FIRST]], i64 0
|
||||
// CHECK: %{{.*}} = insertelement <2 x i32> %[[#OUT]], i32 %[[#RET_SECOND]], i64 1
|
||||
void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
|
||||
{
|
||||
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_bvh_stack_rtn))
|
||||
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_permlane64(
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane64.i32(i32 %a)
|
||||
void test_permlane64(global uint* out, uint a) {
|
||||
*out = __builtin_amdgcn_permlane64(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64))
|
||||
*out = __builtin_amdgcn_permlane64(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_s_wait_event_export_ready
|
||||
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.wait.event.export.ready
|
||||
void test_s_wait_event_export_ready() {
|
||||
__builtin_amdgcn_s_wait_event_export_ready();
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready))
|
||||
__builtin_amdgcn_s_wait_event_export_ready();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_global_add_f32
|
||||
@ -58,5 +63,6 @@ void test_global_add_f32(float *rtn, global float *addr, float x) {
|
||||
#else
|
||||
void test_global_add_f32(float *rtn, __attribute__((address_space(1))) float *addr, float x) {
|
||||
#endif
|
||||
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_atomic_fadd_f32))
|
||||
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
|
||||
}
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
|
||||
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN --enable-var-scope %s
|
||||
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV --enable-var-scope %s
|
||||
|
||||
#define INVALID_MEMORY_SCOPE (__MEMORY_SCOPE_CLUSTR+1)
|
||||
|
||||
@ -16,42 +16,48 @@ typedef unsigned int uint;
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.div.fixup.f16
|
||||
void test_div_fixup_f16(global half* out, half a, half b, half c)
|
||||
{
|
||||
*out = __builtin_amdgcn_div_fixuph(a, b, c);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_div_fixuph))
|
||||
*out = __builtin_amdgcn_div_fixuph(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_rcp_f16
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rcp.f16
|
||||
void test_rcp_f16(global half* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_rcph(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_rcph))
|
||||
*out = __builtin_amdgcn_rcph(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_sqrt_f16
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16
|
||||
void test_sqrt_f16(global half* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_sqrth(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_sqrth))
|
||||
*out = __builtin_amdgcn_sqrth(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_rsq_f16
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rsq.f16
|
||||
void test_rsq_f16(global half* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_rsqh(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_rsqh))
|
||||
*out = __builtin_amdgcn_rsqh(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_sin_f16
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.sin.f16
|
||||
void test_sin_f16(global half* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_sinh(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_sinh))
|
||||
*out = __builtin_amdgcn_sinh(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_cos_f16
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.cos.f16
|
||||
void test_cos_f16(global half* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_cosh(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_cosh))
|
||||
*out = __builtin_amdgcn_cosh(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_ldexp_f16
|
||||
@ -59,179 +65,202 @@ void test_cos_f16(global half* out, half a)
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
|
||||
void test_ldexp_f16(global half* out, half a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_ldexph(a, b);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ldexph))
|
||||
*out = __builtin_amdgcn_ldexph(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_frexp_mant_f16
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16
|
||||
void test_frexp_mant_f16(global half* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_frexp_manth(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_frexp_manth))
|
||||
*out = __builtin_amdgcn_frexp_manth(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_frexp_exp_f16
|
||||
// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16
|
||||
void test_frexp_exp_f16(global short* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_frexp_exph(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_frexp_exph))
|
||||
*out = __builtin_amdgcn_frexp_exph(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_fract_f16
|
||||
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16
|
||||
void test_fract_f16(global half* out, half a)
|
||||
{
|
||||
*out = __builtin_amdgcn_fracth(a);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_fracth))
|
||||
*out = __builtin_amdgcn_fracth(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_class_f16
|
||||
// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16
|
||||
void test_class_f16(global half* out, half a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_classh(a, b);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_classh))
|
||||
*out = __builtin_amdgcn_classh(a, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_s_memrealtime
|
||||
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime()
|
||||
void test_s_memrealtime(global ulong* out)
|
||||
{
|
||||
*out = __builtin_amdgcn_s_memrealtime();
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memrealtime))
|
||||
*out = __builtin_amdgcn_s_memrealtime();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_s_dcache_wb()
|
||||
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb()
|
||||
void test_s_dcache_wb()
|
||||
{
|
||||
__builtin_amdgcn_s_dcache_wb();
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_dcache_wb))
|
||||
__builtin_amdgcn_s_dcache_wb();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp_int
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
|
||||
void test_mov_dpp_int(global int* out, int src)
|
||||
{
|
||||
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
|
||||
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp_long
|
||||
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %0,
|
||||
// CHECK: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %[[DPP_RET]],
|
||||
void test_mov_dpp_long(long x, global long *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp_float
|
||||
// CHECK: %0 = bitcast float %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i32 %1,
|
||||
// CHECK: %[[BC:[0-9]+]] = bitcast float %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[BC]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i32 %[[DPP_RET]],
|
||||
void test_mov_dpp_float(float x, global float *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp_double
|
||||
// CHECK: %0 = bitcast double %x to i64
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %1,
|
||||
// CHECK: %[[BC:[0-9]+]] = bitcast double %x to i64
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %[[BC]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %[[DPP_RET]],
|
||||
void test_mov_dpp_double(double x, global double *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp_short
|
||||
// CHECK: %0 = zext i16 %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %2 = trunc i32 %1 to i16
|
||||
// CHECK-NEXT: store i16 %2,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
|
||||
// CHECK-NEXT: store i16 %[[TRUNC]],
|
||||
void test_mov_dpp_short(short x, global short *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp_char
|
||||
// CHECK: %0 = zext i8 %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %2 = trunc i32 %1 to i8
|
||||
// CHECK-NEXT: store i8 %2,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i8 %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i8
|
||||
// CHECK-NEXT: store i8 %[[TRUNC]],
|
||||
void test_mov_dpp_char(char x, global char *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
|
||||
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mov_dpp_half
|
||||
// CHECK: %0 = load i16,
|
||||
// CHECK: %1 = zext i16 %0 to i32
|
||||
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %3 = trunc i32 %2 to i16
|
||||
// CHECK-NEXT: store i16 %3,
|
||||
// CHECK: %[[LD:[0-9]+]] = load i16,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %[[LD]] to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
|
||||
// CHECK-NEXT: store i16 %[[TRUNC]],
|
||||
void test_mov_dpp_half(half *x, global half *p) {
|
||||
*p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_mov_dpp))
|
||||
*p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_int
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
|
||||
void test_update_dpp_int(global int* out, int arg1, int arg2)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_long
|
||||
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %0,
|
||||
// CHECK: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %[[DPP_RET]],
|
||||
void test_update_dpp_long(long x, global long *p) {
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_float
|
||||
// CHECK: %0 = bitcast float %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i32 %1,
|
||||
// CHECK: %[[BC:[0-9]+]] = bitcast float %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[BC]], i32 %[[BC]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i32 %[[DPP_RET]],
|
||||
void test_update_dpp_float(float x, global float *p) {
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_double
|
||||
// CHECK: %0 = bitcast double %x to i64
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %1,
|
||||
// CHECK: %[[BC:[0-9]+]] = bitcast double %x to i64
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %[[BC]], i64 %[[BC]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: store i64 %[[DPP_RET]],
|
||||
void test_update_dpp_double(double x, global double *p) {
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_short
|
||||
// CHECK: %0 = zext i16 %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %2 = trunc i32 %1 to i16
|
||||
// CHECK-NEXT: store i16 %2,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[ZEXT]], i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
|
||||
// CHECK-NEXT: store i16 %[[TRUNC]],
|
||||
void test_update_dpp_short(short x, global short *p) {
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_char
|
||||
// CHECK: %0 = zext i8 %x to i32
|
||||
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %2 = trunc i32 %1 to i8
|
||||
// CHECK-NEXT: store i8 %2,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i8 %x to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[ZEXT]], i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i8
|
||||
// CHECK-NEXT: store i8 %[[TRUNC]],
|
||||
void test_update_dpp_char(char x, global char *p) {
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_half
|
||||
// CHECK: %0 = load i16,
|
||||
// CHECK: %1 = zext i16 %0 to i32
|
||||
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %3 = trunc i32 %2 to i16
|
||||
// CHECK-NEXT: store i16 %3,
|
||||
// CHECK: %[[LD:[0-9]+]] = load i16,
|
||||
// CHECK: %[[ZEXT:[0-9]+]] = zext i16 %[[LD]] to i32
|
||||
// CHECK-NEXT: %[[DPP_RET:[0-9]+]] = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %[[ZEXT]], i32 %[[ZEXT]], i32 257, i32 15, i32 15, i1 false)
|
||||
// CHECK-NEXT: %[[TRUNC:[0-9]+]] = trunc i32 %[[DPP_RET]] to i16
|
||||
// CHECK-NEXT: store i16 %[[TRUNC]],
|
||||
void test_update_dpp_half(half *x, global half *p) {
|
||||
*p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_int_uint
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
|
||||
void test_update_dpp_int_uint(global int* out, int arg1, unsigned int arg2)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_update_dpp_lit_int
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
|
||||
void test_update_dpp_lit_int(global int* out, int arg1)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(5, arg1, 0, 0, 0, false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(5, arg1, 0, 0, 0, false);
|
||||
}
|
||||
|
||||
__constant int gi = 5;
|
||||
@ -240,7 +269,8 @@ __constant int gi = 5;
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
|
||||
void test_update_dpp_const_int(global int* out, int arg1)
|
||||
{
|
||||
*out = __builtin_amdgcn_update_dpp(gi, arg1, 0, 0, 0, false);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_update_dpp))
|
||||
*out = __builtin_amdgcn_update_dpp(gi, arg1, 0, 0, 0, false);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_ds_fadd
|
||||
@ -397,14 +427,16 @@ void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
|
||||
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
|
||||
void test_s_memtime(global ulong* out)
|
||||
{
|
||||
*out = __builtin_amdgcn_s_memtime();
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_memtime))
|
||||
*out = __builtin_amdgcn_s_memtime();
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_perm
|
||||
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
|
||||
void test_perm(global uint* out, uint a, uint b, uint s)
|
||||
{
|
||||
*out = __builtin_amdgcn_perm(a, b, s);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_perm))
|
||||
*out = __builtin_amdgcn_perm(a, b, s);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_groupstaticsize
|
||||
|
||||
@ -1189,31 +1189,36 @@ kernel void test_ds_consume_lds(__attribute__((address_space(1))) int* out, __at
|
||||
// CHECK-LABEL: @test_gws_init(
|
||||
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id)
|
||||
kernel void test_gws_init(uint value, uint id) {
|
||||
__builtin_amdgcn_ds_gws_init(value, id);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_init))
|
||||
__builtin_amdgcn_ds_gws_init(value, id);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_gws_barrier(
|
||||
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id)
|
||||
kernel void test_gws_barrier(uint value, uint id) {
|
||||
__builtin_amdgcn_ds_gws_barrier(value, id);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_barrier))
|
||||
__builtin_amdgcn_ds_gws_barrier(value, id);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_gws_sema_v(
|
||||
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.v(i32 %id)
|
||||
kernel void test_gws_sema_v(uint id) {
|
||||
__builtin_amdgcn_ds_gws_sema_v(id);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_sema_v))
|
||||
__builtin_amdgcn_ds_gws_sema_v(id);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_gws_sema_br(
|
||||
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.br(i32 %value, i32 %id)
|
||||
kernel void test_gws_sema_br(uint value, uint id) {
|
||||
__builtin_amdgcn_ds_gws_sema_br(value, id);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_sema_br))
|
||||
__builtin_amdgcn_ds_gws_sema_br(value, id);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_gws_sema_p(
|
||||
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.ds.gws.sema.p(i32 %id)
|
||||
kernel void test_gws_sema_p(uint id) {
|
||||
__builtin_amdgcn_ds_gws_sema_p(id);
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_ds_gws_sema_p))
|
||||
__builtin_amdgcn_ds_gws_sema_p(id);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_mbcnt_lo(
|
||||
|
||||
62
clang/test/SemaHIP/amdgpu-feature-builtins-cant-jump.hip
Normal file
62
clang/test/SemaHIP/amdgpu-feature-builtins-cant-jump.hip
Normal file
@ -0,0 +1,62 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// REQUIRES: spirv-registered-target
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -target-cpu gfx900 -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -target-cpu gfx1201 -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
|
||||
#define __device__ __attribute__((device))
|
||||
#define __global__ __attribute__((global))
|
||||
|
||||
__device__ void f(int *ptr, int size, bool f) {
|
||||
int i = 0;
|
||||
if (f)
|
||||
goto label; // expected-error {{cannot jump from this goto statement to its label}}
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx900")) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
|
||||
for (i = 0; i < size; ++i) {
|
||||
label:
|
||||
ptr[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void g(int *ptr, int size, bool f) {
|
||||
int i = 0;
|
||||
if (f)
|
||||
goto label; // expected-error {{cannot jump from this goto statement to its label}}
|
||||
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
|
||||
for (i = 0; i < size; ++i) {
|
||||
label:
|
||||
ptr[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void h(int *ptr, int size, bool f) {
|
||||
int i = 0;
|
||||
if (f)
|
||||
goto label; // expected-error {{cannot jump from this goto statement to its label}}
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx900")) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
|
||||
for (i = 0; i < size; ++i) {
|
||||
label:
|
||||
ptr[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void i(int *ptr, int size, bool f) {
|
||||
int i = 0;
|
||||
if (f)
|
||||
goto label; // expected-error {{cannot jump from this goto statement to its label}}
|
||||
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) { // expected-note {{jump enters statement controlled by AMDGPU feature predicate}}
|
||||
for (i = 0; i < size; ++i) {
|
||||
label:
|
||||
ptr[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// REQUIRES: spirv-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx900 -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
|
||||
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1201 -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
|
||||
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
|
||||
// RUN: %clang_cc1 -triple x86_64 -aux-triple amdgcn -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
|
||||
// RUN: %clang_cc1 -triple x86_64 -aux-triple spirv64-amd-amdhsa -ast-dump -ast-dump-decl-types %s | FileCheck %s --strict-whitespace
|
||||
|
||||
__attribute__((device)) auto foo() {
|
||||
return __builtin_amdgcn_processor_is("gfx900");
|
||||
}
|
||||
|
||||
__attribute__((device)) decltype(auto) bar() {
|
||||
return __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep);
|
||||
}
|
||||
|
||||
// CHECK: |-TypedefDecl {{.*}} implicit __amdgpu_feature_predicate_t '__amdgpu_feature_predicate_t'
|
||||
// CHECK-NEXT: | `-BuiltinType {{.*}} '__amdgpu_feature_predicate_t'
|
||||
// CHECK-DAG: |-FunctionDecl {{.*}} foo '__amdgpu_feature_predicate_t ()'
|
||||
// CHECK-NEXT: |-CompoundStmt {{.*}}
|
||||
// CHECK-NEXT: | `-ReturnStmt {{.*}}
|
||||
// CHECK-NEXT: | `-CallExpr {{.*}} '__amdgpu_feature_predicate_t'
|
||||
// CHECK-NEXT: | |-ImplicitCastExpr {{.*}} '__amdgpu_feature_predicate_t (*)(const char *) noexcept'
|
||||
// CHECK-NEXT: | | `-DeclRefExpr {{.*}} Function {{.*}} '__builtin_amdgcn_processor_is' '__amdgpu_feature_predicate_t (const char *) noexcept'
|
||||
// CHECK-NEXT: | `-StringLiteral {{.*}} "gfx900"
|
||||
// CHECK-DAG: |-FunctionDecl {{.*}} bar '__amdgpu_feature_predicate_t ()'
|
||||
// CHECK-NEXT: |-CompoundStmt {{.*}}
|
||||
// CHECK-NEXT: | `-ReturnStmt {{.*}}
|
||||
// CHECK-NEXT: | `-CallExpr {{.*}} '__amdgpu_feature_predicate_t'
|
||||
// CHECK-NEXT: | |-ImplicitCastExpr {{.*}} '__amdgpu_feature_predicate_t (*)() noexcept' <BuiltinFnToFnPtr>
|
||||
// CHECK-NEXT: | | `-DeclRefExpr {{.*}} Function {{.*}} '__builtin_amdgcn_is_invocable' '__amdgpu_feature_predicate_t () noexcept'
|
||||
40
clang/test/SemaHIP/amdgpu-feature-predicate.hip
Normal file
40
clang/test/SemaHIP/amdgpu-feature-predicate.hip
Normal file
@ -0,0 +1,40 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// REQUIRED: spirv-registered-target
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
|
||||
#define __device__ __attribute__((device))
|
||||
#define __global__ __attribute__((global))
|
||||
|
||||
__device__ void foo(__amdgpu_feature_predicate_t l) {
|
||||
decltype(__builtin_amdgcn_processor_is("gfx900")) what; // expected-error {{'what' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
typeof(__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep)) why; // expected-error {{'why' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
|
||||
bool b = true;
|
||||
__amdgpu_feature_predicate_t v = false; // expected-error {{'v' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
static_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{static_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
|
||||
dynamic_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{invalid target type '__amdgpu_feature_predicate_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}}
|
||||
reinterpret_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{reinterpret_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
|
||||
__amdgpu_feature_predicate_t k; // expected-error {{'k' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
int *ip = (int *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'int *'}}
|
||||
void *vp = (void *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'void *'}}
|
||||
}
|
||||
|
||||
__global__ void bar(__amdgpu_feature_predicate_t l) {
|
||||
decltype(__builtin_amdgcn_processor_is("gfx900")) what; // expected-error {{'what' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
typeof(__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep)) why; // expected-error {{'why' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
|
||||
bool b = true;
|
||||
__amdgpu_feature_predicate_t v = false; // expected-error {{'v' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
static_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{static_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
|
||||
dynamic_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{invalid target type '__amdgpu_feature_predicate_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}}
|
||||
reinterpret_cast<__amdgpu_feature_predicate_t>(b); // expected-error {{reinterpret_cast from 'bool' to '__amdgpu_feature_predicate_t' is not allowed}}
|
||||
__amdgpu_feature_predicate_t k; // expected-error {{'k' has type __amdgpu_feature_predicate_t, which is not constructible}}
|
||||
int *ip = (int *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'int *'}}
|
||||
void *vp = (void *)l; // expected-error {{cannot cast from type '__amdgpu_feature_predicate_t' to pointer type 'void *'}}
|
||||
}
|
||||
|
||||
static_assert(sizeof(__amdgpu_feature_predicate_t) == 0); // expected-error {{invalid application of 'sizeof' to sizeless type '__amdgpu_feature_predicate_t'}}
|
||||
static_assert(alignof(__amdgpu_feature_predicate_t) == 0); // expected-error {{invalid application of 'alignof' to sizeless type '__amdgpu_feature_predicate_t'}};
|
||||
115
clang/test/SemaHIP/amdgpu-feature-predicates-guard-use.hip
Normal file
115
clang/test/SemaHIP/amdgpu-feature-predicates-guard-use.hip
Normal file
@ -0,0 +1,115 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// REQUIRES: spirv-registered-target
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wamdgpu-unguarded-builtin-usage %s
|
||||
|
||||
#define __device__ __attribute__((device))
|
||||
#define __global__ __attribute__((global))
|
||||
|
||||
__device__ void g();
|
||||
|
||||
__device__ void f(int x, bool b) {
|
||||
long v15_16;
|
||||
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // expected-warning {{the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence might be invalid for some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_is_processor check to silence this warning}}
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx90a")) {
|
||||
long v15_16;
|
||||
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x));
|
||||
}
|
||||
|
||||
if (!__builtin_amdgcn_processor_is("gfx90a")) {
|
||||
long v15_16;
|
||||
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // expected-warning {{the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence might be invalid for some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_is_processor check to silence this warning}}
|
||||
}
|
||||
|
||||
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var) ? __builtin_amdgcn_s_sleep_var(x) : __builtin_trap();
|
||||
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var) ? __builtin_amdgcn_s_sleep_var(x) : __builtin_trap(); // expected-error {{'__builtin_amdgcn_s_sleep_var' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s)}}
|
||||
|
||||
const auto lambda = [=] __device__ () {
|
||||
__builtin_amdgcn_s_sleep(42); // expected-warning {{'__builtin_amdgcn_s_sleep' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_sleep' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var))
|
||||
__builtin_amdgcn_s_sleep_var(x);
|
||||
};
|
||||
|
||||
const auto generic_lambda = [] __device__ (auto&& y) {
|
||||
__builtin_amdgcn_s_sleep(42); // expected-warning {{'__builtin_amdgcn_s_sleep' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_sleep' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) {
|
||||
__builtin_amdgcn_s_sleep_var(y);
|
||||
// Has the same requirements - gfx12-insts, thus correct, but we should still warn.
|
||||
__builtin_amdgcn_s_barrier_signal_var(nullptr, y); // expected-warning {{'__builtin_amdgcn_s_barrier_signal_var' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_barrier_signal_var' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
}
|
||||
};
|
||||
|
||||
__builtin_amdgcn_s_sleep(42); // expected-warning {{'__builtin_amdgcn_s_sleep' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_sleep' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx1201")) {
|
||||
if (__builtin_amdgcn_processor_is("gfx906")) // expected-error {{conflicting check for AMDGCN processor '__builtin_amdgcn_processor_is("gfx906")' found in a scope already controlled by a check for AMDGCN processor}}
|
||||
// expected-note@-2 {{predicate guard, with establishes the context, inserted here}}
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx900")) {
|
||||
if (__builtin_amdgcn_processor_is("gfx900")) // This is fine, albeit potentially spurious.
|
||||
++x;
|
||||
}
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx1030"))
|
||||
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-error {{'__builtin_amdgcn_s_barrier_signal_isfirst' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s), which 'gfx1030' does not provide}}
|
||||
// expected-note@-2 {{predicate guard, with establishes the context, inserted here}}
|
||||
|
||||
// Direct guard
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep))
|
||||
__builtin_amdgcn_s_sleep(42);
|
||||
|
||||
// Guarded scope
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) {
|
||||
if (b) {
|
||||
g();
|
||||
while (--x > 42)
|
||||
__builtin_amdgcn_s_sleep_var(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((target("arch=gfx1030")))
|
||||
__device__ void h(int x) {
|
||||
if (__builtin_amdgcn_processor_is("gfx1030")) // Fine, same processor
|
||||
return;
|
||||
|
||||
long v15_16;
|
||||
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // "Fine", explicit gfx target
|
||||
|
||||
__builtin_amdgcn_s_ttracedata_imm(42); // expected-warning {{'__builtin_amdgcn_s_ttracedata_imm' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_ttracedata_imm' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
|
||||
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-error {{'__builtin_amdgcn_s_barrier_signal_isfirst' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s), which 'gfx1030' does not provide}}
|
||||
// expected-note@-12 {{predicate guard, with establishes the context, inserted here}}
|
||||
|
||||
if (__builtin_amdgcn_processor_is("gfx906")) // expected-error {{conflicting check for AMDGCN processor '__builtin_amdgcn_processor_is("gfx906")' found in a scope already controlled by a check for AMDGCN processor}}
|
||||
// expected-note@-15 {{predicate guard, with establishes the context, inserted here}}
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
__attribute__((target("gfx11-insts")))
|
||||
__device__ void i(int x) {
|
||||
__builtin_amdgcn_s_wait_event_export_ready(); // expected-warning {{'__builtin_amdgcn_s_wait_event_export_ready' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_wait_event_export_ready' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
|
||||
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-error {{'__builtin_amdgcn_s_barrier_signal_isfirst' cannot be invoked in the current context, as it requires the 'gfx12-insts' feature(s)}}
|
||||
}
|
||||
|
||||
__attribute__((target("gfx11-insts,gfx12-insts")))
|
||||
__device__ void j(int x) {
|
||||
__builtin_amdgcn_s_wait_event_export_ready(); // expected-warning {{'__builtin_amdgcn_s_wait_event_export_ready' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_wait_event_export_ready' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
|
||||
__builtin_amdgcn_s_barrier_signal_isfirst(42); // expected-warning {{'__builtin_amdgcn_s_barrier_signal_isfirst' might be unavailable on some AMDGPU targets}}
|
||||
// expected-note@-1 {{enclose '__builtin_amdgcn_s_barrier_signal_isfirst' in a __builtin_amdgcn_is_invocable check to silence this warning}}
|
||||
}
|
||||
21
clang/test/SemaHIP/amdgpu-is-invocable.hip
Normal file
21
clang/test/SemaHIP/amdgpu-is-invocable.hip
Normal file
@ -0,0 +1,21 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// REQUIRES: spirv-registered-target
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#define __device__ __attribute__((device))
|
||||
#define __global__ __attribute__((global))
|
||||
|
||||
__device__ void foo() {
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16))
|
||||
return __builtin_trap();
|
||||
}
|
||||
|
||||
__global__ void bar() {
|
||||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16))
|
||||
return __builtin_trap();
|
||||
}
|
||||
21
clang/test/SemaHIP/amdgpu-processor-is.hip
Normal file
21
clang/test/SemaHIP/amdgpu-processor-is.hip
Normal file
@ -0,0 +1,21 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// REQUIRES: spirv-registered-target
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
|
||||
// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple spirv64-amd-amdhsa -Wno-unused-value %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
#define __device__ __attribute__((device))
|
||||
#define __global__ __attribute__((global))
|
||||
|
||||
__device__ void foo() {
|
||||
if (__builtin_amdgcn_processor_is("gfx900"))
|
||||
return __builtin_trap();
|
||||
}
|
||||
|
||||
__global__ void bar() {
|
||||
if (__builtin_amdgcn_processor_is("gfx900"))
|
||||
return __builtin_trap();
|
||||
}
|
||||
@ -345,6 +345,7 @@ private:
|
||||
.Case("__fp16", "h")
|
||||
.Case("__hlsl_resource_t", "Qr")
|
||||
.Case("__amdgpu_buffer_rsrc_t", "Qb")
|
||||
.Case("__amdgpu_feature_predicate_t", "Qc")
|
||||
.Case("__amdgpu_texture_t", "Qt")
|
||||
.Case("__int128_t", "LLLi")
|
||||
.Case("_Float16", "x")
|
||||
|
||||
@ -2403,11 +2403,6 @@ static bool generateSpecConstantInst(const SPIRV::IncomingCall *Call,
|
||||
|
||||
switch (Opcode) {
|
||||
case SPIRV::OpSpecConstant: {
|
||||
// Build the SpecID decoration.
|
||||
unsigned SpecId =
|
||||
static_cast<unsigned>(getIConstVal(Call->Arguments[0], MRI));
|
||||
buildOpDecorate(Call->ReturnRegister, MIRBuilder, SPIRV::Decoration::SpecId,
|
||||
{SpecId});
|
||||
// Determine the constant MI.
|
||||
Register ConstRegister = Call->Arguments[1];
|
||||
const MachineInstr *Const = getDefInstrMaybeConstant(ConstRegister, MRI);
|
||||
@ -2433,6 +2428,11 @@ static bool generateSpecConstantInst(const SPIRV::IncomingCall *Call,
|
||||
else
|
||||
addNumImm(ConstOperand.getFPImm()->getValueAPF().bitcastToAPInt(), MIB);
|
||||
}
|
||||
// Build the SpecID decoration.
|
||||
unsigned SpecId =
|
||||
static_cast<unsigned>(getIConstVal(Call->Arguments[0], MRI));
|
||||
buildOpDecorate(Call->ReturnRegister, MIRBuilder, SPIRV::Decoration::SpecId,
|
||||
{SpecId});
|
||||
return true;
|
||||
}
|
||||
case SPIRV::OpSpecConstantComposite: {
|
||||
|
||||
@ -4239,7 +4239,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
|
||||
.addUse(GR.getSPIRVTypeID(ResType));
|
||||
MIB.constrainAllUses(TII, TRI, RBI);
|
||||
unsigned SpecId = I.getOperand(2).getImm();
|
||||
buildOpDecorate(I.getOperand(0).getReg(), *MIB.getInstr(), TII,
|
||||
buildOpDecorate(I.getOperand(0).getReg(), *++MIB->getIterator(), TII,
|
||||
SPIRV::Decoration::SpecId, {SpecId});
|
||||
|
||||
return true;
|
||||
|
||||
@ -439,6 +439,9 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
|
||||
if (!CF || !CF->isIntrinsic())
|
||||
continue;
|
||||
auto *II = cast<IntrinsicInst>(Call);
|
||||
if (Intrinsic::isTargetIntrinsic(II->getIntrinsicID()) &&
|
||||
II->getCalledOperand()->getName().starts_with("llvm.spv"))
|
||||
continue;
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::bswap:
|
||||
|
||||
@ -6,8 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The pass transforms IR globals that cannot be trivially mapped to SPIRV
|
||||
// into something that is trival to lower.
|
||||
// The pass:
|
||||
// - transforms IR globals that cannot be trivially mapped to SPIRV into
|
||||
// something that is trival to lower;
|
||||
// - for AMDGCN flavoured SPIRV, it assigns unique IDs to the specialisation
|
||||
// constants associated with feature predicates, which were inserted by the
|
||||
// FE when expanding calls to __builtin_amdgcn_processor_is or
|
||||
// __builtin_amdgcn_is_invocable
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -15,9 +20,15 @@
|
||||
#include "SPIRVUtils.h"
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/IR/IntrinsicsSPIRV.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#include <climits>
|
||||
#include <string>
|
||||
|
||||
#define DEBUG_TYPE "spirv-prepare-globals"
|
||||
|
||||
using namespace llvm;
|
||||
@ -71,6 +82,51 @@ bool tryReplaceAliasWithAliasee(GlobalAlias &GA) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool tryAssignPredicateSpecConstIDs(Module &M, Function *F) {
|
||||
StringMap<unsigned> IDs;
|
||||
for (auto &&U : F->users()) {
|
||||
auto *CI = dyn_cast<CallInst>(U);
|
||||
if (!CI)
|
||||
continue;
|
||||
|
||||
auto *SpecID = dyn_cast<ConstantInt>(CI->getArgOperand(0));
|
||||
if (!SpecID)
|
||||
continue;
|
||||
|
||||
unsigned ID = SpecID->getZExtValue();
|
||||
if (ID != UINT32_MAX)
|
||||
continue;
|
||||
|
||||
// Replace placeholder Specialisation Constant IDs with unique IDs
|
||||
// associated with the predicate being evaluated, which is encoded via
|
||||
// spv_assign_name.
|
||||
auto *MD =
|
||||
cast<MDNode>(cast<MetadataAsValue>(CI->getOperand(2))->getMetadata());
|
||||
auto *P = cast<MDString>(MD->getOperand(0));
|
||||
|
||||
ID = IDs.try_emplace(P->getString(), IDs.size()).first->second;
|
||||
CI->setArgOperand(0, ConstantInt::get(CI->getArgOperand(0)->getType(), ID));
|
||||
}
|
||||
|
||||
if (IDs.empty())
|
||||
return false;
|
||||
|
||||
// Store the predicate -> ID mapping as a fixed format string
|
||||
// (predicate ID\0...), for later use during SPIR-V consumption.
|
||||
std::string Tmp;
|
||||
for (auto &&[Predicate, SpecID] : IDs)
|
||||
Tmp.append(Predicate).append(" ").append(utostr(SpecID)).push_back('\0');
|
||||
|
||||
Constant *PredSpecIDStr =
|
||||
ConstantDataArray::getString(M.getContext(), Tmp, false);
|
||||
|
||||
new GlobalVariable(M, PredSpecIDStr->getType(), true,
|
||||
GlobalVariable::LinkageTypes::ExternalLinkage,
|
||||
PredSpecIDStr, "llvm.amdgcn.feature.predicate.ids");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SPIRVPrepareGlobals::runOnModule(Module &M) {
|
||||
bool Changed = false;
|
||||
|
||||
@ -78,6 +134,16 @@ bool SPIRVPrepareGlobals::runOnModule(Module &M) {
|
||||
Changed |= tryReplaceAliasWithAliasee(GA);
|
||||
}
|
||||
|
||||
if (M.getTargetTriple().getVendor() != Triple::AMD)
|
||||
return Changed;
|
||||
|
||||
// TODO: Currently, for AMDGCN flavoured SPIR-V, the symbol can only be
|
||||
// inserted via feature predicate use, but in the future this will need
|
||||
// revisiting if we start making more liberal use of the intrinsic.
|
||||
if (Function *F = Intrinsic::getDeclarationIfExists(
|
||||
&M, Intrinsic::spv_named_boolean_spec_constant))
|
||||
Changed |= tryAssignPredicateSpecConstIDs(M, F);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
char SPIRVPrepareGlobals::ID = 0;
|
||||
|
||||
@ -0,0 +1,281 @@
|
||||
; RUN: llc -O0 --verify-machineinstrs -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck %s
|
||||
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %}
|
||||
|
||||
; CHECK: OpName %[[#KERNEL:]] "kernel"
|
||||
; CHECK: OpName %[[#FEATURE_PREDICATE_IDS:]] "llvm.amdgcn.feature.predicate.ids"
|
||||
; CHECK: OpName %[[#SET_FPENV_I64:]] "spirv.llvm_set_fpenv_i64"
|
||||
; CHECK: OpName %[[#ASHR_PK_I8_I32:]] "spirv.llvm_amdgcn_ashr_pk_i8_i32"
|
||||
; CHECK: OpName %[[#S_SLEEP_VAR:]] "spirv.llvm_amdgcn_s_sleep_var"
|
||||
; CHECK: OpName %[[#S_WAIT_EVENT_EXPORT_READY:]] "spirv.llvm_amdgcn_s_wait_event_export_ready"
|
||||
; CHECK: OpName %[[#S_TTRACEDATA_IMM:]] "spirv.llvm_amdgcn_s_ttracedata_imm"
|
||||
; CHECK: OpDecorate %[[#IS_GFX950:]] SpecId 8
|
||||
; CHECK: OpDecorate %[[#IS_GFX1201:]] SpecId 3
|
||||
; CHECK: OpDecorate %[[#HAS_GFX12_INSTS:]] SpecId 7
|
||||
; CHECK: OpDecorate %[[#IS_GFX906:]] SpecId 6
|
||||
; CHECK: OpDecorate %[[#IS_GFX1010:]] SpecId 4
|
||||
; CHECK: OpDecorate %[[#IS_GFX1101:]] SpecId 5
|
||||
; CHECK: OpDecorate %[[#IS_GFX1101_1:]] SpecId 5
|
||||
; CHECK: OpDecorate %[[#IS_GFX1010_1:]] SpecId 4
|
||||
; CHECK: OpDecorate %[[#IS_GFX1201_1:]] SpecId 3
|
||||
; CHECK: OpDecorate %[[#HAS_GFX11_INSTS:]] SpecId 0
|
||||
; CHECK: OpDecorate %[[#HAS_GFX10_INSTS:]] SpecId 2
|
||||
; CHECK: OpDecorate %[[#HAS_GFX1250_INSTS:]] SpecId 1
|
||||
; CHECK: OpDecorate %[[#HAS_GFX11_INSTS_1:]] SpecId 0
|
||||
; CHECK: %[[#BOOL:]] = OpTypeBool
|
||||
; CHECK: %[[#UCHAR:]] = OpTypeInt 8
|
||||
; CHECK: %[[#FEATURE_PREDICATE_IDS_MAP_STRLEN:]] = OpConstant %[[#]] 137
|
||||
; CHECK: %[[#FEATURE_PREDICATE_IDS_MAP_STRTY:]] = OpTypeArray %[[#UCHAR]] %[[#FEATURE_PREDICATE_IDS_MAP_STRLEN]]
|
||||
; CHECK: %[[#FEATURE_PREDICATE_IDS_MAP_STRVAL:]] = OpConstantComposite %[[#FEATURE_PREDICATE_IDS_MAP_STRTY]]
|
||||
; CHECK: %[[#FEATURE_PREDICATE_IDS]] = OpVariable %[[#]] CrossWorkgroup %[[#FEATURE_PREDICATE_IDS_MAP_STRVAL]]
|
||||
; CHECK: %[[#IS_GFX950]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#IS_GFX1201]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#HAS_GFX12_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#IS_GFX906]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#IS_GFX1010]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#IS_GFX1101]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#IS_GFX1101_1]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#IS_GFX1010_1]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#IS_GFX1201_1]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#HAS_GFX11_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#HAS_GFX10_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#HAS_GFX1250_INSTS]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
; CHECK: %[[#HAS_GFX11_INSTS_1]] = OpSpecConstantFalse %[[#BOOL]]
|
||||
|
||||
declare void @llvm.amdgcn.s.monitor.sleep(i16 immarg) addrspace(4)
|
||||
|
||||
declare void @llvm.amdgcn.s.sleep(i32 immarg) addrspace(4)
|
||||
|
||||
declare i1 @llvm.spv.named.boolean.spec.constant(i32, i1, metadata) addrspace(4)
|
||||
|
||||
declare i16 @llvm.amdgcn.ashr.pk.i8.i32(i32, i32, i32) addrspace(4) #3
|
||||
|
||||
declare void @llvm.set.fpenv.i64(i64) addrspace(4) #4
|
||||
|
||||
declare void @llvm.amdgcn.s.sleep.var(i32) addrspace(4) #5
|
||||
|
||||
declare void @llvm.amdgcn.s.wait.event.export.ready() addrspace(4) #5
|
||||
|
||||
declare void @llvm.amdgcn.s.ttracedata.imm(i16 immarg) addrspace(4) #6
|
||||
|
||||
@p = external addrspace(1) global i32
|
||||
@g = external addrspace(1) constant i32
|
||||
|
||||
define void @kernel() addrspace(4) {
|
||||
; CHECK-DAG: %[[#KERNEL]] = OpFunction %39 None %40 ; -- Begin function kernel
|
||||
; CHECK-NEXT: %2 = OpLabel
|
||||
; CHECK-NEXT: %112 = OpLoad %44 %85 Aligned 4
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX950]] %4 %3
|
||||
; CHECK-NEXT: %3 = OpLabel
|
||||
; CHECK-NEXT: %113 = OpFunctionCall %39 %[[#SET_FPENV_I64]] %57
|
||||
; CHECK-NEXT: OpBranch %5
|
||||
; CHECK-NEXT: %4 = OpLabel
|
||||
; CHECK-NEXT: %114 = OpFunctionCall %43 %[[#ASHR_PK_I8_I32]] %56 %56 %56
|
||||
; CHECK-NEXT: OpBranch %5
|
||||
; CHECK-NEXT: %5 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1201]] %7 %6
|
||||
; CHECK-NEXT: %6 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX12_INSTS]] %7 %8
|
||||
; CHECK-NEXT: %7 = OpLabel
|
||||
; CHECK-NEXT: %115 = OpFunctionCall %39 %[[#S_SLEEP_VAR]] %112
|
||||
; CHECK-NEXT: OpBranch %8
|
||||
; CHECK-NEXT: %8 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX906]] %10 %9
|
||||
; CHECK-NEXT: %9 = OpLabel
|
||||
; CHECK-NEXT: %116 = OpFunctionCall %39 %[[#S_WAIT_EVENT_EXPORT_READY]]
|
||||
; CHECK-NEXT: OpBranch %14
|
||||
; CHECK-NEXT: %10 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1010]] %12 %11
|
||||
; CHECK-NEXT: %11 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1101]] %12 %13
|
||||
; CHECK-NEXT: %12 = OpLabel
|
||||
; CHECK-NEXT: %117 = OpFunctionCall %39 %[[#S_TTRACEDATA_IMM]] %55
|
||||
; CHECK-NEXT: OpBranch %13
|
||||
; CHECK-NEXT: %13 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %14
|
||||
; CHECK-NEXT: %14 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %15
|
||||
; CHECK-NEXT: %15 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1101_1]] %16 %17
|
||||
; CHECK-NEXT: %16 = OpLabel
|
||||
; CHECK-NEXT: %118 = OpLoad %44 %97 Aligned 4
|
||||
; CHECK-NEXT: %119 = OpIAdd %44 %118 %112
|
||||
; CHECK-NEXT: OpStore %97 %119 Aligned 4
|
||||
; CHECK-NEXT: OpBranch %17
|
||||
; CHECK-NEXT: %17 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %18
|
||||
; CHECK-NEXT: %18 = OpLabel
|
||||
; CHECK-NEXT: %120 = OpLoad %44 %97 Aligned 4
|
||||
; CHECK-NEXT: %121 = OpISub %44 %120 %112
|
||||
; CHECK-NEXT: OpStore %97 %121 Aligned 4
|
||||
; CHECK-NEXT: OpBranch %19
|
||||
; CHECK-NEXT: %19 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1010_1]] %20 %21
|
||||
; CHECK-NEXT: %20 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %21
|
||||
; CHECK-NEXT: %21 = OpLabel
|
||||
; CHECK-NEXT: %122 = OpPhi %48 %54 %19 %54 %20
|
||||
; CHECK-NEXT: OpBranchConditional %122 %18 %22
|
||||
; CHECK-NEXT: %22 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %23
|
||||
; CHECK-NEXT: %23 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#IS_GFX1201_1]] %24 %25
|
||||
; CHECK-NEXT: %24 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %25
|
||||
; CHECK-NEXT: %25 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX11_INSTS]] %29 %26
|
||||
; CHECK-NEXT: %26 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX10_INSTS]] %27 %28
|
||||
; CHECK-NEXT: %27 = OpLabel
|
||||
; CHECK-NEXT: %123 = OpFunctionCall %39 %[[#S_TTRACEDATA_IMM]] %55
|
||||
; CHECK-NEXT: OpBranch %28
|
||||
; CHECK-NEXT: %28 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %30
|
||||
; CHECK-NEXT: %29 = OpLabel
|
||||
; CHECK-NEXT: %124 = OpFunctionCall %39 %[[#S_WAIT_EVENT_EXPORT_READY]]
|
||||
; CHECK-NEXT: OpBranch %30
|
||||
; CHECK-NEXT: %30 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %31
|
||||
; CHECK-NEXT: %31 = OpLabel
|
||||
; CHECK-NEXT: %125 = OpLoad %44 %97 Aligned 4
|
||||
; CHECK-NEXT: %126 = OpISub %44 %125 %112
|
||||
; CHECK-NEXT: OpStore %97 %126 Aligned 4
|
||||
; CHECK-NEXT: OpBranch %32
|
||||
; CHECK-NEXT: %32 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX1250_INSTS]] %33 %34
|
||||
; CHECK-NEXT: %33 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %34
|
||||
; CHECK-NEXT: %34 = OpLabel
|
||||
; CHECK-NEXT: %127 = OpPhi %48 %54 %32 %54 %33
|
||||
; CHECK-NEXT: OpBranchConditional %127 %31 %35
|
||||
; CHECK-NEXT: %35 = OpLabel
|
||||
; CHECK-NEXT: OpBranch %36
|
||||
; CHECK-NEXT: %36 = OpLabel
|
||||
; CHECK-NEXT: OpBranchConditional %[[#HAS_GFX11_INSTS_1]] %37 %38
|
||||
|
||||
entry:
|
||||
%x = load i32, ptr addrspace(1) @g
|
||||
%is.gfx950. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !9)
|
||||
br i1 %is.gfx950., label %cond.true, label %cond.false
|
||||
cond.true:
|
||||
%0 = call addrspace(4) i16 @llvm.amdgcn.ashr.pk.i8.i32(i32 8, i32 8, i32 8)
|
||||
br label %cond.end
|
||||
cond.false:
|
||||
call addrspace(4) void @llvm.set.fpenv.i64(i64 -1)
|
||||
br label %cond.end
|
||||
cond.end:
|
||||
%is.gfx1201. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
|
||||
br i1 %is.gfx1201., label %if.then, label %lor.lhs.false
|
||||
lor.lhs.false:
|
||||
%has.gfx12-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !11)
|
||||
br i1 %has.gfx12-insts., label %if.then, label %if.end
|
||||
if.then:
|
||||
call addrspace(4) void @llvm.amdgcn.s.sleep.var(i32 %x)
|
||||
br label %if.end
|
||||
if.end:
|
||||
%is.gfx906. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !12)
|
||||
br i1 %is.gfx906., label %if.else, label %if.then2
|
||||
if.then2:
|
||||
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
|
||||
br label %if.end6
|
||||
if.else:
|
||||
%is.gfx1010. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
|
||||
br i1 %is.gfx1010., label %if.then4, label %lor.lhs.false3
|
||||
lor.lhs.false3:
|
||||
%is.gfx1101. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
|
||||
br i1 %is.gfx1101., label %if.then4, label %if.end5
|
||||
if.then4:
|
||||
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
|
||||
br label %if.end5
|
||||
if.end5:
|
||||
br label %if.end6
|
||||
if.end6:
|
||||
br label %while.cond
|
||||
while.cond:
|
||||
%is.gfx1101.7 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
|
||||
br i1 %is.gfx1101.7, label %while.body, label %while.end
|
||||
while.body:
|
||||
%4 = load i32, ptr addrspace(1) @p
|
||||
%add = add i32 %4, %x
|
||||
store i32 %add, ptr addrspace(1) @p
|
||||
br label %while.end
|
||||
while.end:
|
||||
br label %do.body
|
||||
do.body:
|
||||
%7 = load i32, ptr addrspace(1) @p
|
||||
%sub = sub i32 %7, %x
|
||||
store i32 %sub, ptr addrspace(1) @p
|
||||
br label %do.cond
|
||||
do.cond:
|
||||
%is.gfx1010.8 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
|
||||
br i1 %is.gfx1010.8, label %land.rhs, label %land.end
|
||||
land.rhs:
|
||||
br label %land.end
|
||||
land.end:
|
||||
%c = phi i1 [ false, %do.cond ], [ false, %land.rhs ]
|
||||
br i1 %c, label %do.body, label %do.end
|
||||
do.end:
|
||||
br label %for.cond
|
||||
for.cond:
|
||||
%is.gfx1201.9 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
|
||||
br i1 %is.gfx1201.9, label %for.body, label %for.end
|
||||
for.body:
|
||||
br label %for.end
|
||||
for.inc:
|
||||
%9 = load i32, ptr addrspace(1) @p
|
||||
%inc = add i32 %9, 1
|
||||
store i32 %inc, ptr addrspace(1) @p
|
||||
br label %for.cond
|
||||
for.end:
|
||||
%has.gfx11-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
|
||||
br i1 %has.gfx11-insts., label %if.then10, label %if.else11
|
||||
if.then10:
|
||||
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
|
||||
br label %if.end14
|
||||
if.else11:
|
||||
%has.gfx10-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !19)
|
||||
br i1 %has.gfx10-insts., label %if.then12, label %if.end13
|
||||
if.then12:
|
||||
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
|
||||
br label %if.end13
|
||||
if.end13:
|
||||
br label %if.end14
|
||||
if.end14:
|
||||
br label %do.body15
|
||||
do.body15:
|
||||
%12 = load i32, ptr addrspace(1) @p
|
||||
%sub16 = sub i32 %12, %x
|
||||
store i32 %sub16, ptr addrspace(1) @p
|
||||
br label %do.cond17
|
||||
do.cond17:
|
||||
%has.gfx1250-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !20)
|
||||
br i1 %has.gfx1250-insts., label %land.rhs9, label %land.end10
|
||||
land.rhs9:
|
||||
br label %land.end10
|
||||
land.end10:
|
||||
%c1 = phi i1 [ false, %do.cond17 ], [ false, %land.rhs9 ]
|
||||
br i1 %c1, label %do.body15, label %do.end18
|
||||
do.end18:
|
||||
br label %for.cond19
|
||||
for.cond19:
|
||||
%has.gfx11-insts.20 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
|
||||
br i1 %has.gfx11-insts.20, label %for.body21, label %for.end24
|
||||
for.body21:
|
||||
br label %for.end24
|
||||
for.inc22:
|
||||
%14 = load i32, ptr addrspace(1) @p
|
||||
%inc23 = add i32 %14, 1
|
||||
store i32 %inc23, ptr addrspace(1) @p
|
||||
br label %for.cond19
|
||||
for.end24:
|
||||
ret void
|
||||
}
|
||||
|
||||
!9 = !{!"is.gfx950"}
|
||||
!10 = !{!"is.gfx1201"}
|
||||
!11 = !{!"has.gfx12-insts"}
|
||||
!12 = !{!"is.gfx906"}
|
||||
!13 = !{!"is.gfx1010"}
|
||||
!14 = !{!"is.gfx1101"}
|
||||
!18 = !{!"has.gfx11-insts"}
|
||||
!19 = !{!"has.gfx10-insts"}
|
||||
!20 = !{!"has.gfx1250-insts"}
|
||||
@ -0,0 +1,153 @@
|
||||
; RUN: llc -O0 -mtriple=spirv64-amd-amdhsa %s -print-after-all -o - 2>&1 | FileCheck %s
|
||||
; RUN: llc -O3 -mtriple=spirv64-amd-amdhsa %s -print-after-all -o - 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: *** IR Dump After SPIRV prepare global variables (prepare-globals) ***
|
||||
|
||||
declare void @llvm.amdgcn.s.monitor.sleep(i16 immarg) addrspace(4)
|
||||
|
||||
declare void @llvm.amdgcn.s.sleep(i32 immarg) addrspace(4)
|
||||
|
||||
declare i1 @_Z20__spirv_SpecConstantib(i32, i1) addrspace(4)
|
||||
|
||||
declare i16 @llvm.amdgcn.ashr.pk.i8.i32(i32, i32, i32) addrspace(4) #3
|
||||
|
||||
declare void @llvm.set.fpenv.i64(i64) addrspace(4) #4
|
||||
|
||||
declare void @llvm.amdgcn.s.sleep.var(i32) addrspace(4) #5
|
||||
|
||||
declare void @llvm.amdgcn.s.wait.event.export.ready() addrspace(4) #5
|
||||
|
||||
declare void @llvm.amdgcn.s.ttracedata.imm(i16 immarg) addrspace(4) #6
|
||||
|
||||
@p = external addrspace(1) global i32
|
||||
@g = external addrspace(1) constant i32
|
||||
; CHECK: @llvm.amdgcn.feature.predicate.ids = addrspace(1) constant [137 x i8] c"is.gfx1010 4\00is.gfx950 0\00is.gfx1101 5\00has.gfx11-insts 6\00is.gfx906 3\00is.gfx1201 1\00has.gfx1250-insts 8\00has.gfx10-insts 7\00has.gfx12-insts 2\00"
|
||||
|
||||
define void @kernel() addrspace(4) {
|
||||
entry:
|
||||
%x = load i32, ptr addrspace(1) @g
|
||||
%is.gfx950. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !9)
|
||||
br i1 %is.gfx950., label %cond.true, label %cond.false
|
||||
cond.true:
|
||||
%0 = call addrspace(4) i16 @llvm.amdgcn.ashr.pk.i8.i32(i32 8, i32 8, i32 8)
|
||||
br label %cond.end
|
||||
cond.false:
|
||||
call addrspace(4) void @llvm.set.fpenv.i64(i64 -1)
|
||||
br label %cond.end
|
||||
cond.end:
|
||||
%is.gfx1201. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
|
||||
br i1 %is.gfx1201., label %if.then, label %lor.lhs.false
|
||||
lor.lhs.false:
|
||||
%has.gfx12-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !11)
|
||||
br i1 %has.gfx12-insts., label %if.then, label %if.end
|
||||
if.then:
|
||||
call addrspace(4) void @llvm.amdgcn.s.sleep.var(i32 %x)
|
||||
br label %if.end
|
||||
if.end:
|
||||
%is.gfx906. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !12)
|
||||
br i1 %is.gfx906., label %if.else, label %if.then2
|
||||
if.then2:
|
||||
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
|
||||
br label %if.end6
|
||||
if.else:
|
||||
%is.gfx1010. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
|
||||
br i1 %is.gfx1010., label %if.then4, label %lor.lhs.false3
|
||||
lor.lhs.false3:
|
||||
%is.gfx1101. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
|
||||
br i1 %is.gfx1101., label %if.then4, label %if.end5
|
||||
if.then4:
|
||||
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
|
||||
br label %if.end5
|
||||
if.end5:
|
||||
br label %if.end6
|
||||
if.end6:
|
||||
br label %while.cond
|
||||
while.cond:
|
||||
%is.gfx1101.7 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !14)
|
||||
br i1 %is.gfx1101.7, label %while.body, label %while.end
|
||||
while.body:
|
||||
%4 = load i32, ptr addrspace(1) @p
|
||||
%add = add i32 %4, %x
|
||||
store i32 %add, ptr addrspace(1) @p
|
||||
br label %while.end
|
||||
while.end:
|
||||
br label %do.body
|
||||
do.body:
|
||||
%7 = load i32, ptr addrspace(1) @p
|
||||
%sub = sub i32 %7, %x
|
||||
store i32 %sub, ptr addrspace(1) @p
|
||||
br label %do.cond
|
||||
do.cond:
|
||||
%is.gfx1010.8 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !13)
|
||||
br i1 %is.gfx1010.8, label %land.rhs, label %land.end
|
||||
land.rhs:
|
||||
br label %land.end
|
||||
land.end:
|
||||
%c = phi i1 [ false, %do.cond ], [ false, %land.rhs ]
|
||||
br i1 %c, label %do.body, label %do.end
|
||||
do.end:
|
||||
br label %for.cond
|
||||
for.cond:
|
||||
%is.gfx1201.9 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !10)
|
||||
br i1 %is.gfx1201.9, label %for.body, label %for.end
|
||||
for.body:
|
||||
br label %for.end
|
||||
for.inc:
|
||||
%9 = load i32, ptr addrspace(1) @p
|
||||
%inc = add i32 %9, 1
|
||||
store i32 %inc, ptr addrspace(1) @p
|
||||
br label %for.cond
|
||||
for.end:
|
||||
%has.gfx11-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
|
||||
br i1 %has.gfx11-insts., label %if.then10, label %if.else11
|
||||
if.then10:
|
||||
call addrspace(4) void @llvm.amdgcn.s.wait.event.export.ready()
|
||||
br label %if.end14
|
||||
if.else11:
|
||||
%has.gfx10-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !19)
|
||||
br i1 %has.gfx10-insts., label %if.then12, label %if.end13
|
||||
if.then12:
|
||||
call addrspace(4) void @llvm.amdgcn.s.ttracedata.imm(i16 1)
|
||||
br label %if.end13
|
||||
if.end13:
|
||||
br label %if.end14
|
||||
if.end14:
|
||||
br label %do.body15
|
||||
do.body15:
|
||||
%12 = load i32, ptr addrspace(1) @p
|
||||
%sub16 = sub i32 %12, %x
|
||||
store i32 %sub16, ptr addrspace(1) @p
|
||||
br label %do.cond17
|
||||
do.cond17:
|
||||
%has.gfx1250-insts. = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !20)
|
||||
br i1 %has.gfx1250-insts., label %land.rhs9, label %land.end10
|
||||
land.rhs9:
|
||||
br label %land.end10
|
||||
land.end10:
|
||||
%c1 = phi i1 [ false, %do.cond17 ], [ false, %land.rhs9 ]
|
||||
br i1 %c1, label %do.body15, label %do.end18
|
||||
do.end18:
|
||||
br label %for.cond19
|
||||
for.cond19:
|
||||
%has.gfx11-insts.20 = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata !18)
|
||||
br i1 %has.gfx11-insts.20, label %for.body21, label %for.end24
|
||||
for.body21:
|
||||
br label %for.end24
|
||||
for.inc22:
|
||||
%14 = load i32, ptr addrspace(1) @p
|
||||
%inc23 = add i32 %14, 1
|
||||
store i32 %inc23, ptr addrspace(1) @p
|
||||
br label %for.cond19
|
||||
for.end24:
|
||||
ret void
|
||||
}
|
||||
|
||||
!9 = !{!"is.gfx950"}
|
||||
!10 = !{!"is.gfx1201"}
|
||||
!11 = !{!"has.gfx12-insts"}
|
||||
!12 = !{!"is.gfx906"}
|
||||
!13 = !{!"is.gfx1010"}
|
||||
!14 = !{!"is.gfx1101"}
|
||||
!18 = !{!"has.gfx11-insts"}
|
||||
!19 = !{!"has.gfx10-insts"}
|
||||
!20 = !{!"has.gfx1250-insts"}
|
||||
Loading…
x
Reference in New Issue
Block a user