This change adds two builtins for AMDGPU: - `__builtin_amdgcn_processor_is`, which is similar in observable behaviour with `__builtin_cpu_is`, except that it is never "evaluated" at run time; - `__builtin_amdgcn_is_invocable`, which is behaviourally similar with `__has_builtin`, except that it is not a macro (i.e. not evaluated at preprocessing time). Neither of these are `constexpr`, even though when compiling for concrete (i.e. `gfxXXX` / `gfxXXX-generic`) targets they get evaluated in Clang, so they shouldn't tear the AST too badly / at all for multi-pass compilation cases like HIP. They can only be used in specific contexts (as args to control structures). The motivation for adding these is two-fold: - as a nice to have, it provides an AST-visible way to incorporate architecture specific code, rather than having to rely on macros and the preprocessor, which burn in the choice quite early; - as a must have, it allows featureful AMDGCN flavoured SPIR-V to be produced, where target specific capability is guarded and chosen or discarded when finalising compilation for a concrete target; this is built atop the Speciali\ation Constant concept which is described in the SPIR-V specification under section [2.12 Specialization](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_specialization_2) I've tried to keep the overall footprint of the change small. The changes to Sema are a bit unpleasant, but there was a strong desire to have Clang validate these, and to constrain their uses, and this was the most compact solution I could come up with (suggestions welcome). --------- Co-authored-by: Juan Manuel Martinez Caamaño <jmartinezcaamao@gmail.com> Co-authored-by: Voicu <avoicu@amd.com>
1050 lines
42 KiB
C++
1050 lines
42 KiB
C++
//===------ SemaAMDGPU.cpp ------- AMDGPU target-specific routines --------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements semantic analysis functions specific to AMDGPU.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "clang/Sema/SemaAMDGPU.h"
|
|
#include "clang/AST/Decl.h"
|
|
#include "clang/AST/DynamicRecursiveASTVisitor.h"
|
|
#include "clang/AST/Expr.h"
|
|
#include "clang/Basic/DiagnosticFrontend.h"
|
|
#include "clang/Basic/DiagnosticSema.h"
|
|
#include "clang/Basic/TargetBuiltins.h"
|
|
#include "clang/Basic/TargetInfo.h"
|
|
#include "clang/Sema/Ownership.h"
|
|
#include "clang/Sema/Scope.h"
|
|
#include "clang/Sema/Sema.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/Support/AMDGPUAddrSpace.h"
|
|
#include "llvm/Support/AtomicOrdering.h"
|
|
#include "llvm/TargetParser/TargetParser.h"
|
|
#include <cstdint>
|
|
#include <utility>
|
|
|
|
namespace clang {
|
|
|
|
SemaAMDGPU::SemaAMDGPU(Sema &S) : SemaBase(S) {}
|
|
|
|
bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
|
|
CallExpr *TheCall) {
|
|
// position of memory order and scope arguments in the builtin
|
|
unsigned OrderIndex, ScopeIndex;
|
|
|
|
const auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
|
|
assert(FD && "AMDGPU builtins should not be used outside of a function");
|
|
llvm::StringMap<bool> CallerFeatureMap;
|
|
getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
|
|
bool HasGFX950Insts =
|
|
Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
|
|
|
|
switch (BuiltinID) {
|
|
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
|
|
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_async_lds:
|
|
case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
|
|
case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_async_lds:
|
|
case AMDGPU::BI__builtin_amdgcn_load_to_lds:
|
|
case AMDGPU::BI__builtin_amdgcn_load_async_to_lds:
|
|
case AMDGPU::BI__builtin_amdgcn_global_load_lds:
|
|
case AMDGPU::BI__builtin_amdgcn_global_load_async_lds: {
|
|
constexpr const int SizeIdx = 2;
|
|
llvm::APSInt Size;
|
|
Expr *ArgExpr = TheCall->getArg(SizeIdx);
|
|
// Check for instantiation-dependent expressions (e.g., involving template
|
|
// parameters). These will be checked again during template instantiation.
|
|
if (ArgExpr->isInstantiationDependent())
|
|
return false;
|
|
[[maybe_unused]] ExprResult R =
|
|
SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
|
|
assert(!R.isInvalid());
|
|
switch (Size.getSExtValue()) {
|
|
case 1:
|
|
case 2:
|
|
case 4:
|
|
return false;
|
|
case 12:
|
|
case 16: {
|
|
if (HasGFX950Insts)
|
|
return false;
|
|
[[fallthrough]];
|
|
}
|
|
default:
|
|
SemaRef.targetDiag(ArgExpr->getExprLoc(),
|
|
diag::err_amdgcn_load_lds_size_invalid_value)
|
|
<< ArgExpr->getSourceRange();
|
|
SemaRef.targetDiag(ArgExpr->getExprLoc(),
|
|
diag::note_amdgcn_load_lds_size_valid_value)
|
|
<< HasGFX950Insts << ArgExpr->getSourceRange();
|
|
return true;
|
|
}
|
|
}
|
|
case AMDGPU::BI__builtin_amdgcn_get_fpenv:
|
|
case AMDGPU::BI__builtin_amdgcn_set_fpenv:
|
|
return false;
|
|
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
|
|
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
|
|
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
|
|
case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
|
|
OrderIndex = 2;
|
|
ScopeIndex = 3;
|
|
break;
|
|
case AMDGPU::BI__builtin_amdgcn_fence:
|
|
OrderIndex = 0;
|
|
ScopeIndex = 1;
|
|
break;
|
|
case AMDGPU::BI__builtin_amdgcn_s_setreg:
|
|
return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
|
|
/*High=*/UINT16_MAX);
|
|
case AMDGPU::BI__builtin_amdgcn_s_wait_event: {
|
|
llvm::APSInt Result;
|
|
if (SemaRef.BuiltinConstantArg(TheCall, 0, Result))
|
|
return true;
|
|
|
|
bool IsGFX12Plus = Builtin::evaluateRequiredTargetFeatures(
|
|
"gfx12-insts", CallerFeatureMap);
|
|
|
|
// gfx11 -> gfx12 changed the interpretation of the bitmask. gfx12 inverted
|
|
// the intepretation for export_ready, but shifted the used bit by 1. Thus
|
|
// waiting for the export_ready event can use a value of 2 universally.
|
|
if (((IsGFX12Plus && !Result[1]) || (!IsGFX12Plus && Result[0])) ||
|
|
Result.getZExtValue() > 2) {
|
|
Expr *ArgExpr = TheCall->getArg(0);
|
|
SemaRef.targetDiag(ArgExpr->getExprLoc(),
|
|
diag::warn_amdgpu_s_wait_event_mask_no_effect_target)
|
|
<< ArgExpr->getSourceRange();
|
|
SemaRef.targetDiag(ArgExpr->getExprLoc(),
|
|
diag::note_amdgpu_s_wait_event_suggested_value)
|
|
<< ArgExpr->getSourceRange();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
|
|
return checkMovDPPFunctionCall(TheCall, 5, 1);
|
|
case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
|
|
return checkMovDPPFunctionCall(TheCall, 2, 1);
|
|
case AMDGPU::BI__builtin_amdgcn_update_dpp:
|
|
return checkMovDPPFunctionCall(TheCall, 6, 2);
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp8:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp8:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_bf8:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_bf8:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp4:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp4:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp8:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_bf8:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp4:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_fp6:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_fp6:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_bf6:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
|
|
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
|
|
return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 15);
|
|
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
|
|
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
|
|
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
|
|
return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false);
|
|
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
|
|
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
|
|
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
|
|
return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
|
|
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
|
|
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
|
|
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
|
|
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
|
|
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
|
|
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
|
|
return checkAtomicMonitorLoad(TheCall);
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
|
|
StringRef FeatureList(
|
|
getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
|
|
if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
|
|
CallerFeatureMap)) {
|
|
Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
|
|
<< FD->getDeclName() << FeatureList;
|
|
return false;
|
|
}
|
|
|
|
unsigned ArgCount = TheCall->getNumArgs() - 1;
|
|
llvm::APSInt Result;
|
|
|
|
// Compilain about dmask values which are too huge to fully fit into 4 bits
|
|
// (which is the actual size of the dmask in corresponding HW instructions).
|
|
constexpr unsigned DMaskArgNo = 0;
|
|
constexpr int Low = 0;
|
|
constexpr int High = 15;
|
|
if (SemaRef.BuiltinConstantArg(TheCall, DMaskArgNo, Result) ||
|
|
SemaRef.BuiltinConstantArgRange(TheCall, DMaskArgNo, Low, High,
|
|
/* RangeIsError = */ true))
|
|
return true;
|
|
|
|
// Dmask indicates which elements should be returned and it is not possible
|
|
// to return more values than there are elements in return type.
|
|
int NumElementsInRetTy = 1;
|
|
const Type *RetTy = TheCall->getType().getTypePtr();
|
|
if (auto *VTy = dyn_cast<VectorType>(RetTy))
|
|
NumElementsInRetTy = VTy->getNumElements();
|
|
int NumActiveBitsInDMask =
|
|
llvm::popcount(static_cast<uint8_t>(Result.getExtValue()));
|
|
if (NumActiveBitsInDMask > NumElementsInRetTy) {
|
|
Diag(TheCall->getBeginLoc(),
|
|
diag::err_amdgcn_dmask_has_too_many_bits_set);
|
|
return true;
|
|
}
|
|
|
|
// For gather, only one bit can be set indicating which exact component to
|
|
// return.
|
|
bool ExtraGatherChecks =
|
|
BuiltinID == AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32 &&
|
|
SemaRef.BuiltinConstantArgPower2(TheCall, 0);
|
|
|
|
return ExtraGatherChecks ||
|
|
(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
|
|
(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result));
|
|
}
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
|
|
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
|
|
StringRef FeatureList(
|
|
getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
|
|
if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
|
|
CallerFeatureMap)) {
|
|
Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
|
|
<< FD->getDeclName() << FeatureList;
|
|
return false;
|
|
}
|
|
|
|
unsigned ArgCount = TheCall->getNumArgs() - 1;
|
|
llvm::APSInt Result;
|
|
|
|
// Complain about dmask values which are too huge to fully fit into 4 bits
|
|
// (which is the actual size of the dmask in corresponding HW instructions).
|
|
constexpr unsigned DMaskArgNo = 1;
|
|
return SemaRef.BuiltinConstantArgRange(TheCall, DMaskArgNo, /*Low=*/0,
|
|
/*High=*/15,
|
|
/*RangeIsError=*/true) ||
|
|
SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result) ||
|
|
SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result);
|
|
}
|
|
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
|
|
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
|
|
if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
|
|
if (SemaRef.checkArgCountRange(TheCall, 7, 8))
|
|
return true;
|
|
if (TheCall->getNumArgs() == 7)
|
|
return false;
|
|
} else if (BuiltinID ==
|
|
AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
|
|
if (SemaRef.checkArgCountRange(TheCall, 8, 9))
|
|
return true;
|
|
if (TheCall->getNumArgs() == 8)
|
|
return false;
|
|
}
|
|
// Check if the last argument (clamp operand) is a constant and is
|
|
// convertible to bool.
|
|
Expr *ClampArg = TheCall->getArg(TheCall->getNumArgs() - 1);
|
|
// 1) Ensure clamp argument is a constant expression
|
|
llvm::APSInt ClampValue;
|
|
if (!SemaRef.VerifyIntegerConstantExpression(ClampArg, &ClampValue)
|
|
.isUsable())
|
|
return true;
|
|
// 2) Check if the argument can be converted to bool type
|
|
if (!SemaRef.Context.hasSameType(ClampArg->getType(),
|
|
SemaRef.Context.BoolTy)) {
|
|
// Try to convert to bool
|
|
QualType BoolTy = SemaRef.Context.BoolTy;
|
|
ExprResult ClampExpr(ClampArg);
|
|
SemaRef.CheckSingleAssignmentConstraints(BoolTy, ClampExpr);
|
|
if (ClampExpr.isInvalid())
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
|
|
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
|
|
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
|
|
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
|
|
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
|
|
case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
|
|
return SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/0, /*Low=*/0,
|
|
/*High=*/0) ||
|
|
SemaRef.BuiltinConstantArgRange(TheCall, /*ArgNum=*/2, /*Low=*/0,
|
|
/*High=*/0);
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
ExprResult Arg = TheCall->getArg(OrderIndex);
|
|
auto ArgExpr = Arg.get();
|
|
Expr::EvalResult ArgResult;
|
|
|
|
if (!ArgExpr->EvaluateAsInt(ArgResult, getASTContext()))
|
|
return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
|
|
<< ArgExpr->getType();
|
|
auto Ord = ArgResult.Val.getInt().getZExtValue();
|
|
|
|
// Check validity of memory ordering as per C11 / C++11's memory model.
|
|
// Only fence needs check. Atomic dec/inc allow all memory orders.
|
|
if (!llvm::isValidAtomicOrderingCABI(Ord))
|
|
return Diag(ArgExpr->getBeginLoc(),
|
|
diag::warn_atomic_op_has_invalid_memory_order)
|
|
<< 0 << ArgExpr->getSourceRange();
|
|
switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
|
|
case llvm::AtomicOrderingCABI::relaxed:
|
|
case llvm::AtomicOrderingCABI::consume:
|
|
if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
|
|
return Diag(ArgExpr->getBeginLoc(),
|
|
diag::warn_atomic_op_has_invalid_memory_order)
|
|
<< 0 << ArgExpr->getSourceRange();
|
|
break;
|
|
case llvm::AtomicOrderingCABI::acquire:
|
|
case llvm::AtomicOrderingCABI::release:
|
|
case llvm::AtomicOrderingCABI::acq_rel:
|
|
case llvm::AtomicOrderingCABI::seq_cst:
|
|
break;
|
|
}
|
|
|
|
Arg = TheCall->getArg(ScopeIndex);
|
|
ArgExpr = Arg.get();
|
|
Expr::EvalResult ArgResult1;
|
|
// Check that sync scope is a constant literal
|
|
if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, getASTContext()))
|
|
return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
|
|
<< ArgExpr->getType();
|
|
|
|
return false;
|
|
}
|
|
|
|
bool SemaAMDGPU::checkAtomicOrderingCABIArg(Expr *E, bool MayLoad,
|
|
bool MayStore) {
|
|
Expr::EvalResult AtomicOrdArgRes;
|
|
if (!E->EvaluateAsInt(AtomicOrdArgRes, getASTContext()))
|
|
llvm_unreachable("Intrinsic requires imm for atomic ordering argument!");
|
|
auto Ord =
|
|
llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue());
|
|
|
|
// Atomic ordering cannot be acq_rel in any case, acquire for stores or
|
|
// release for loads.
|
|
if (!llvm::isValidAtomicOrderingCABI((unsigned)Ord) ||
|
|
(!(MayLoad && MayStore) && (Ord == llvm::AtomicOrderingCABI::acq_rel)) ||
|
|
(!MayLoad && Ord == llvm::AtomicOrderingCABI::acquire) ||
|
|
(!MayStore && Ord == llvm::AtomicOrderingCABI::release)) {
|
|
return Diag(E->getBeginLoc(), diag::warn_atomic_op_has_invalid_memory_order)
|
|
<< 0 << E->getSourceRange();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) {
|
|
bool Fail = false;
|
|
|
|
// First argument is a global or generic pointer.
|
|
Expr *PtrArg = TheCall->getArg(0);
|
|
QualType PtrTy = PtrArg->getType()->getPointeeType();
|
|
unsigned AS = getASTContext().getTargetAddressSpace(PtrTy.getAddressSpace());
|
|
if (AS != llvm::AMDGPUAS::FLAT_ADDRESS &&
|
|
AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) {
|
|
Fail = true;
|
|
Diag(TheCall->getBeginLoc(), diag::err_amdgcn_coop_atomic_invalid_as)
|
|
<< PtrArg->getSourceRange();
|
|
}
|
|
|
|
Expr *AO = TheCall->getArg(IsStore ? 2 : 1);
|
|
Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
|
|
|
|
if (AO->isValueDependent() || Scope->isValueDependent())
|
|
return false;
|
|
|
|
// Check atomic ordering
|
|
Fail |=
|
|
checkAtomicOrderingCABIArg(TheCall->getArg(IsStore ? 2 : 1),
|
|
/*MayLoad=*/!IsStore, /*MayStore=*/IsStore);
|
|
|
|
// Last argument is the syncscope as a string literal.
|
|
if (!isa<StringLiteral>(Scope->IgnoreParenImpCasts())) {
|
|
Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal)
|
|
<< Scope->getSourceRange();
|
|
Fail = true;
|
|
}
|
|
|
|
return Fail;
|
|
}
|
|
|
|
bool SemaAMDGPU::checkAtomicMonitorLoad(CallExpr *TheCall) {
|
|
bool Fail = false;
|
|
|
|
Expr *AO = TheCall->getArg(1);
|
|
Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
|
|
|
|
if (AO->isValueDependent() || Scope->isValueDependent())
|
|
return false;
|
|
|
|
Fail |= checkAtomicOrderingCABIArg(TheCall->getArg(1), /*MayLoad=*/true,
|
|
/*MayStore=*/false);
|
|
|
|
auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::Generic);
|
|
if (std::optional<llvm::APSInt> Result =
|
|
Scope->getIntegerConstantExpr(SemaRef.Context)) {
|
|
if (!ScopeModel->isValid(Result->getZExtValue())) {
|
|
Diag(Scope->getBeginLoc(), diag::err_atomic_op_has_invalid_sync_scope)
|
|
<< Scope->getSourceRange();
|
|
Fail = true;
|
|
}
|
|
}
|
|
|
|
return Fail;
|
|
}
|
|
|
|
bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
|
|
unsigned NumDataArgs) {
|
|
assert(NumDataArgs <= 2);
|
|
if (SemaRef.checkArgCountRange(TheCall, NumArgs, NumArgs))
|
|
return true;
|
|
Expr *Args[2];
|
|
QualType ArgTys[2];
|
|
for (unsigned I = 0; I != NumDataArgs; ++I) {
|
|
Args[I] = TheCall->getArg(I);
|
|
ArgTys[I] = Args[I]->getType();
|
|
// TODO: Vectors can also be supported.
|
|
if (!ArgTys[I]->isArithmeticType() || ArgTys[I]->isAnyComplexType()) {
|
|
SemaRef.Diag(Args[I]->getBeginLoc(),
|
|
diag::err_typecheck_cond_expect_int_float)
|
|
<< ArgTys[I] << Args[I]->getSourceRange();
|
|
return true;
|
|
}
|
|
}
|
|
if (NumDataArgs < 2)
|
|
return false;
|
|
|
|
if (getASTContext().hasSameUnqualifiedType(ArgTys[0], ArgTys[1]))
|
|
return false;
|
|
|
|
if (((ArgTys[0]->isUnsignedIntegerType() &&
|
|
ArgTys[1]->isSignedIntegerType()) ||
|
|
(ArgTys[0]->isSignedIntegerType() &&
|
|
ArgTys[1]->isUnsignedIntegerType())) &&
|
|
getASTContext().getTypeSize(ArgTys[0]) ==
|
|
getASTContext().getTypeSize(ArgTys[1]))
|
|
return false;
|
|
|
|
SemaRef.Diag(Args[1]->getBeginLoc(),
|
|
diag::err_typecheck_call_different_arg_types)
|
|
<< ArgTys[0] << ArgTys[1];
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr,
|
|
const AMDGPUFlatWorkGroupSizeAttr &Attr) {
|
|
// Accept template arguments for now as they depend on something else.
|
|
// We'll get to check them when they eventually get instantiated.
|
|
if (MinExpr->isValueDependent() || MaxExpr->isValueDependent())
|
|
return false;
|
|
|
|
uint32_t Min = 0;
|
|
if (!S.checkUInt32Argument(Attr, MinExpr, Min, 0))
|
|
return true;
|
|
|
|
uint32_t Max = 0;
|
|
if (!S.checkUInt32Argument(Attr, MaxExpr, Max, 1))
|
|
return true;
|
|
|
|
if (Min == 0 && Max != 0) {
|
|
S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
|
|
<< &Attr << 0;
|
|
return true;
|
|
}
|
|
if (Min > Max) {
|
|
S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
|
|
<< &Attr << 1;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
AMDGPUFlatWorkGroupSizeAttr *
|
|
SemaAMDGPU::CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI,
|
|
Expr *MinExpr, Expr *MaxExpr) {
|
|
ASTContext &Context = getASTContext();
|
|
AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
|
|
|
|
if (checkAMDGPUFlatWorkGroupSizeArguments(SemaRef, MinExpr, MaxExpr, TmpAttr))
|
|
return nullptr;
|
|
return ::new (Context)
|
|
AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr);
|
|
}
|
|
|
|
void SemaAMDGPU::addAMDGPUFlatWorkGroupSizeAttr(Decl *D,
|
|
const AttributeCommonInfo &CI,
|
|
Expr *MinExpr, Expr *MaxExpr) {
|
|
if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr))
|
|
D->addAttr(Attr);
|
|
}
|
|
|
|
void SemaAMDGPU::handleAMDGPUFlatWorkGroupSizeAttr(Decl *D,
|
|
const ParsedAttr &AL) {
|
|
Expr *MinExpr = AL.getArgAsExpr(0);
|
|
Expr *MaxExpr = AL.getArgAsExpr(1);
|
|
|
|
addAMDGPUFlatWorkGroupSizeAttr(D, AL, MinExpr, MaxExpr);
|
|
}
|
|
|
|
static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr,
|
|
Expr *MaxExpr,
|
|
const AMDGPUWavesPerEUAttr &Attr) {
|
|
if (S.DiagnoseUnexpandedParameterPack(MinExpr) ||
|
|
(MaxExpr && S.DiagnoseUnexpandedParameterPack(MaxExpr)))
|
|
return true;
|
|
|
|
// Accept template arguments for now as they depend on something else.
|
|
// We'll get to check them when they eventually get instantiated.
|
|
if (MinExpr->isValueDependent() || (MaxExpr && MaxExpr->isValueDependent()))
|
|
return false;
|
|
|
|
uint32_t Min = 0;
|
|
if (!S.checkUInt32Argument(Attr, MinExpr, Min, 0))
|
|
return true;
|
|
|
|
uint32_t Max = 0;
|
|
if (MaxExpr && !S.checkUInt32Argument(Attr, MaxExpr, Max, 1))
|
|
return true;
|
|
|
|
if (Min == 0 && Max != 0) {
|
|
S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
|
|
<< &Attr << 0;
|
|
return true;
|
|
}
|
|
if (Max != 0 && Min > Max) {
|
|
S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid)
|
|
<< &Attr << 1;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
AMDGPUWavesPerEUAttr *
|
|
SemaAMDGPU::CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI,
|
|
Expr *MinExpr, Expr *MaxExpr) {
|
|
ASTContext &Context = getASTContext();
|
|
AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr);
|
|
|
|
if (checkAMDGPUWavesPerEUArguments(SemaRef, MinExpr, MaxExpr, TmpAttr))
|
|
return nullptr;
|
|
|
|
return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr);
|
|
}
|
|
|
|
void SemaAMDGPU::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI,
|
|
Expr *MinExpr, Expr *MaxExpr) {
|
|
if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr))
|
|
D->addAttr(Attr);
|
|
}
|
|
|
|
void SemaAMDGPU::handleAMDGPUWavesPerEUAttr(Decl *D, const ParsedAttr &AL) {
|
|
if (!AL.checkAtLeastNumArgs(SemaRef, 1) || !AL.checkAtMostNumArgs(SemaRef, 2))
|
|
return;
|
|
|
|
Expr *MinExpr = AL.getArgAsExpr(0);
|
|
Expr *MaxExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
|
|
|
|
addAMDGPUWavesPerEUAttr(D, AL, MinExpr, MaxExpr);
|
|
}
|
|
|
|
void SemaAMDGPU::handleAMDGPUNumSGPRAttr(Decl *D, const ParsedAttr &AL) {
|
|
uint32_t NumSGPR = 0;
|
|
Expr *NumSGPRExpr = AL.getArgAsExpr(0);
|
|
if (!SemaRef.checkUInt32Argument(AL, NumSGPRExpr, NumSGPR))
|
|
return;
|
|
|
|
D->addAttr(::new (getASTContext())
|
|
AMDGPUNumSGPRAttr(getASTContext(), AL, NumSGPR));
|
|
}
|
|
|
|
void SemaAMDGPU::handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL) {
|
|
uint32_t NumVGPR = 0;
|
|
Expr *NumVGPRExpr = AL.getArgAsExpr(0);
|
|
if (!SemaRef.checkUInt32Argument(AL, NumVGPRExpr, NumVGPR))
|
|
return;
|
|
|
|
D->addAttr(::new (getASTContext())
|
|
AMDGPUNumVGPRAttr(getASTContext(), AL, NumVGPR));
|
|
}
|
|
|
|
static bool
|
|
checkAMDGPUMaxNumWorkGroupsArguments(Sema &S, Expr *XExpr, Expr *YExpr,
|
|
Expr *ZExpr,
|
|
const AMDGPUMaxNumWorkGroupsAttr &Attr) {
|
|
if (S.DiagnoseUnexpandedParameterPack(XExpr) ||
|
|
(YExpr && S.DiagnoseUnexpandedParameterPack(YExpr)) ||
|
|
(ZExpr && S.DiagnoseUnexpandedParameterPack(ZExpr)))
|
|
return true;
|
|
|
|
// Accept template arguments for now as they depend on something else.
|
|
// We'll get to check them when they eventually get instantiated.
|
|
if (XExpr->isValueDependent() || (YExpr && YExpr->isValueDependent()) ||
|
|
(ZExpr && ZExpr->isValueDependent()))
|
|
return false;
|
|
|
|
uint32_t NumWG = 0;
|
|
Expr *Exprs[3] = {XExpr, YExpr, ZExpr};
|
|
for (int i = 0; i < 3; i++) {
|
|
if (Exprs[i]) {
|
|
if (!S.checkUInt32Argument(Attr, Exprs[i], NumWG, i,
|
|
/*StrictlyUnsigned=*/true))
|
|
return true;
|
|
if (NumWG == 0) {
|
|
S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
|
|
<< &Attr << Exprs[i]->getSourceRange();
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
AMDGPUMaxNumWorkGroupsAttr *SemaAMDGPU::CreateAMDGPUMaxNumWorkGroupsAttr(
|
|
const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr) {
|
|
ASTContext &Context = getASTContext();
|
|
AMDGPUMaxNumWorkGroupsAttr TmpAttr(Context, CI, XExpr, YExpr, ZExpr);
|
|
assert(!SemaRef.isSFINAEContext() &&
|
|
"Can't produce SFINAE diagnostic pointing to temporary attribute");
|
|
|
|
if (checkAMDGPUMaxNumWorkGroupsArguments(SemaRef, XExpr, YExpr, ZExpr,
|
|
TmpAttr))
|
|
return nullptr;
|
|
|
|
return ::new (Context)
|
|
AMDGPUMaxNumWorkGroupsAttr(Context, CI, XExpr, YExpr, ZExpr);
|
|
}
|
|
|
|
void SemaAMDGPU::addAMDGPUMaxNumWorkGroupsAttr(Decl *D,
|
|
const AttributeCommonInfo &CI,
|
|
Expr *XExpr, Expr *YExpr,
|
|
Expr *ZExpr) {
|
|
if (auto *Attr = CreateAMDGPUMaxNumWorkGroupsAttr(CI, XExpr, YExpr, ZExpr))
|
|
D->addAttr(Attr);
|
|
}
|
|
|
|
void SemaAMDGPU::handleAMDGPUMaxNumWorkGroupsAttr(Decl *D,
|
|
const ParsedAttr &AL) {
|
|
Expr *YExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr;
|
|
Expr *ZExpr = (AL.getNumArgs() > 2) ? AL.getArgAsExpr(2) : nullptr;
|
|
addAMDGPUMaxNumWorkGroupsAttr(D, AL, AL.getArgAsExpr(0), YExpr, ZExpr);
|
|
}
|
|
|
|
Expr *SemaAMDGPU::ExpandAMDGPUPredicateBuiltIn(Expr *E) {
|
|
CallExpr *CE = cast<CallExpr>(E->IgnoreParens());
|
|
ASTContext &Ctx = getASTContext();
|
|
QualType BoolTy = Ctx.getLogicalOperationType();
|
|
llvm::APInt False = llvm::APInt::getZero(Ctx.getIntWidth(BoolTy));
|
|
llvm::APInt True = llvm::APInt::getAllOnes(Ctx.getIntWidth(BoolTy));
|
|
SourceLocation Loc = CE->getExprLoc();
|
|
|
|
if (!CE->getBuiltinCallee())
|
|
return *ExpandedPredicates
|
|
.insert(IntegerLiteral::Create(Ctx, False, BoolTy, Loc))
|
|
.first;
|
|
|
|
bool P = false;
|
|
unsigned BI = CE->getBuiltinCallee();
|
|
if (Ctx.BuiltinInfo.isAuxBuiltinID(BI))
|
|
BI = Ctx.BuiltinInfo.getAuxBuiltinID(BI);
|
|
|
|
if (BI == AMDGPU::BI__builtin_amdgcn_processor_is) {
|
|
auto *GFX = dyn_cast<StringLiteral>(CE->getArg(0)->IgnoreParenCasts());
|
|
if (!GFX) {
|
|
Diag(Loc, diag::err_amdgcn_processor_is_arg_not_literal);
|
|
return nullptr;
|
|
}
|
|
|
|
StringRef N = GFX->getString();
|
|
const TargetInfo &TI = Ctx.getTargetInfo();
|
|
const TargetInfo *AuxTI = Ctx.getAuxTargetInfo();
|
|
if (!TI.isValidCPUName(N) && (!AuxTI || !AuxTI->isValidCPUName(N))) {
|
|
Diag(Loc, diag::err_amdgcn_processor_is_arg_invalid_value) << N;
|
|
SmallVector<StringRef, 32> ValidList;
|
|
if (TI.getTriple().getVendor() == llvm::Triple::VendorType::AMD)
|
|
TI.fillValidCPUList(ValidList);
|
|
else if (AuxTI) // Since the BI is present it must be an AMDGPU triple.
|
|
AuxTI->fillValidCPUList(ValidList);
|
|
if (!ValidList.empty())
|
|
Diag(Loc, diag::note_amdgcn_processor_is_valid_options)
|
|
<< llvm::join(ValidList, ", ");
|
|
return nullptr;
|
|
}
|
|
if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
|
|
CE->setType(BoolTy);
|
|
return *ExpandedPredicates.insert(CE).first;
|
|
}
|
|
|
|
if (auto TID = Ctx.getTargetInfo().getTargetID())
|
|
P = TID->find(N) == 0;
|
|
} else {
|
|
Expr *Arg = CE->getArg(0);
|
|
if (!Arg || Arg->getType() != Ctx.BuiltinFnTy) {
|
|
Diag(Loc, diag::err_amdgcn_is_invocable_arg_invalid_value) << Arg;
|
|
return nullptr;
|
|
}
|
|
|
|
if (Ctx.getTargetInfo().getTriple().isSPIRV()) {
|
|
CE->setType(BoolTy);
|
|
return *ExpandedPredicates.insert(CE).first;
|
|
}
|
|
|
|
auto *FD = cast<FunctionDecl>(Arg->getReferencedDeclOfCallee());
|
|
|
|
StringRef RF = Ctx.BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
|
|
llvm::StringMap<bool> CF;
|
|
Ctx.getFunctionFeatureMap(CF, FD);
|
|
|
|
P = Builtin::evaluateRequiredTargetFeatures(RF, CF);
|
|
}
|
|
|
|
return *ExpandedPredicates
|
|
.insert(
|
|
IntegerLiteral::Create(Ctx, P ? True : False, BoolTy, Loc))
|
|
.first;
|
|
}
|
|
|
|
bool SemaAMDGPU::IsPredicate(Expr *E) const {
|
|
return ExpandedPredicates.contains(E);
|
|
}
|
|
|
|
void SemaAMDGPU::AddPotentiallyUnguardedBuiltinUser(FunctionDecl *FD) {
|
|
PotentiallyUnguardedBuiltinUsers.insert(FD);
|
|
}
|
|
|
|
bool SemaAMDGPU::HasPotentiallyUnguardedBuiltinUsage(FunctionDecl *FD) const {
|
|
return PotentiallyUnguardedBuiltinUsers.contains(FD);
|
|
}
|
|
|
|
namespace {
|
|
/// This class implements -Wamdgpu-unguarded-builtin-usage.
|
|
///
|
|
/// This is done with a traversal of the AST of a function that includes a
|
|
/// call to a target specific builtin. Whenever we encounter an \c if of the
|
|
/// form: \c if(__builtin_amdgcn_is_invocable), we consider the then statement
|
|
/// guarded.
|
|
class DiagnoseUnguardedBuiltins : public DynamicRecursiveASTVisitor {
|
|
// TODO: this could eventually be extended to consider what happens when there
|
|
// are multiple target architectures specified via target("arch=gfxXXX")
|
|
// target("arch=gfxyyy") etc., as well as feature disabling via "-XXX".
|
|
Sema &SemaRef;
|
|
|
|
SmallVector<StringRef> TargetFeatures;
|
|
SmallVector<std::pair<SourceLocation, StringRef>> CurrentGFXIP;
|
|
SmallVector<unsigned> GuardedBuiltins;
|
|
|
|
static Expr *FindPredicate(Expr *Cond) {
|
|
if (auto *CE = dyn_cast<CallExpr>(Cond)) {
|
|
if (CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_is_invocable ||
|
|
CE->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is)
|
|
return Cond;
|
|
} else if (auto *UO = dyn_cast<UnaryOperator>(Cond)) {
|
|
return FindPredicate(UO->getSubExpr());
|
|
} else if (auto *BO = dyn_cast<BinaryOperator>(Cond)) {
|
|
if ((Cond = FindPredicate(BO->getLHS())))
|
|
return Cond;
|
|
return FindPredicate(BO->getRHS());
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
bool EnterPredicateGuardedContext(CallExpr *P);
|
|
void ExitPredicateGuardedContext(bool WasProcessorCheck);
|
|
bool TraverseGuardedStmt(Stmt *S, CallExpr *P);
|
|
|
|
public:
|
|
DiagnoseUnguardedBuiltins(Sema &SemaRef) : SemaRef(SemaRef) {
|
|
if (auto *TAT = SemaRef.getCurFunctionDecl(true)->getAttr<TargetAttr>()) {
|
|
// We use the somewhat misnamed x86 accessors because they provide exactly
|
|
// what we require.
|
|
TAT->getX86AddedFeatures(TargetFeatures);
|
|
if (auto GFXIP = TAT->getX86Architecture())
|
|
CurrentGFXIP.emplace_back(TAT->getLocation(), *GFXIP);
|
|
}
|
|
}
|
|
|
|
bool TraverseLambdaExpr(LambdaExpr *LE) override {
|
|
if (SemaRef.AMDGPU().HasPotentiallyUnguardedBuiltinUsage(
|
|
LE->getCallOperator()))
|
|
return true; // We have already handled this.
|
|
return DynamicRecursiveASTVisitor::TraverseLambdaExpr(LE);
|
|
}
|
|
|
|
bool TraverseStmt(Stmt *S) override {
|
|
if (!S)
|
|
return true;
|
|
return DynamicRecursiveASTVisitor::TraverseStmt(S);
|
|
}
|
|
|
|
void IssueDiagnostics(Stmt *S) { TraverseStmt(S); }
|
|
|
|
bool TraverseIfStmt(IfStmt *If) override {
|
|
if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(If->getCond())))
|
|
return TraverseGuardedStmt(If, CE);
|
|
return DynamicRecursiveASTVisitor::TraverseIfStmt(If);
|
|
}
|
|
|
|
bool TraverseCaseStmt(CaseStmt *CS) override {
|
|
return TraverseStmt(CS->getSubStmt());
|
|
}
|
|
|
|
bool TraverseConditionalOperator(ConditionalOperator *CO) override {
|
|
if (auto *CE = dyn_cast_or_null<CallExpr>(FindPredicate(CO->getCond())))
|
|
return TraverseGuardedStmt(CO, CE);
|
|
return DynamicRecursiveASTVisitor::TraverseConditionalOperator(CO);
|
|
}
|
|
|
|
bool VisitAsmStmt(AsmStmt *ASM) override;
|
|
bool VisitCallExpr(CallExpr *CE) override;
|
|
};
|
|
|
|
bool DiagnoseUnguardedBuiltins::EnterPredicateGuardedContext(CallExpr *P) {
|
|
bool IsProcessorCheck =
|
|
P->getBuiltinCallee() == AMDGPU::BI__builtin_amdgcn_processor_is;
|
|
|
|
if (IsProcessorCheck) {
|
|
StringRef G = cast<clang::StringLiteral>(P->getArg(0))->getString();
|
|
// TODO: handle generic ISAs.
|
|
if (!CurrentGFXIP.empty() && G != CurrentGFXIP.back().second) {
|
|
SemaRef.Diag(P->getExprLoc(),
|
|
diag::err_amdgcn_conflicting_is_processor_options)
|
|
<< P;
|
|
SemaRef.Diag(CurrentGFXIP.back().first,
|
|
diag::note_amdgcn_previous_is_processor_guard);
|
|
}
|
|
CurrentGFXIP.emplace_back(P->getExprLoc(), G);
|
|
} else {
|
|
auto *FD = cast<FunctionDecl>(
|
|
cast<DeclRefExpr>(P->getArg(0))->getReferencedDeclOfCallee());
|
|
GuardedBuiltins.push_back(FD->getBuiltinID());
|
|
}
|
|
|
|
return IsProcessorCheck;
|
|
}
|
|
|
|
void DiagnoseUnguardedBuiltins::ExitPredicateGuardedContext(bool WasProcCheck) {
|
|
if (WasProcCheck)
|
|
CurrentGFXIP.pop_back();
|
|
else
|
|
GuardedBuiltins.pop_back();
|
|
}
|
|
|
|
inline std::pair<Stmt *, Stmt *> GetTraversalOrder(Stmt *S) {
|
|
std::pair<Stmt *, Stmt *> Ordered;
|
|
Expr *Condition = nullptr;
|
|
|
|
if (auto *CO = dyn_cast<ConditionalOperator>(S)) {
|
|
Condition = CO->getCond();
|
|
Ordered = {CO->getTrueExpr(), CO->getFalseExpr()};
|
|
} else if (auto *If = dyn_cast<IfStmt>(S)) {
|
|
Condition = If->getCond();
|
|
Ordered = {If->getThen(), If->getElse()};
|
|
}
|
|
|
|
if (auto *UO = dyn_cast<UnaryOperator>(Condition))
|
|
if (UO->getOpcode() == UnaryOperatorKind::UO_LNot)
|
|
std::swap(Ordered.first, Ordered.second);
|
|
|
|
return Ordered;
|
|
}
|
|
|
|
bool DiagnoseUnguardedBuiltins::TraverseGuardedStmt(Stmt *S, CallExpr *P) {
|
|
assert(S && "Unexpected missing Statement!");
|
|
assert(P && "Unexpected missing Predicate!");
|
|
|
|
auto [Guarded, Unguarded] = GetTraversalOrder(S);
|
|
|
|
bool WasProcessorCheck = EnterPredicateGuardedContext(P);
|
|
|
|
bool Continue = TraverseStmt(Guarded);
|
|
|
|
ExitPredicateGuardedContext(WasProcessorCheck);
|
|
|
|
return Continue && TraverseStmt(Unguarded);
|
|
}
|
|
|
|
bool DiagnoseUnguardedBuiltins::VisitAsmStmt(AsmStmt *ASM) {
|
|
// TODO: should we check if the ASM is valid for the target? Can we?
|
|
if (!CurrentGFXIP.empty())
|
|
return true;
|
|
|
|
std::string S = ASM->generateAsmString(SemaRef.getASTContext());
|
|
SemaRef.Diag(ASM->getAsmLoc(), diag::warn_amdgcn_unguarded_asm_stmt) << S;
|
|
SemaRef.Diag(ASM->getAsmLoc(), diag::note_amdgcn_unguarded_asm_silence) << S;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DiagnoseUnguardedBuiltins::VisitCallExpr(CallExpr *CE) {
|
|
unsigned ID = CE->getBuiltinCallee();
|
|
Builtin::Context &BInfo = SemaRef.getASTContext().BuiltinInfo;
|
|
|
|
if (!ID)
|
|
return true;
|
|
if (!BInfo.isTSBuiltin(ID))
|
|
return true;
|
|
if (ID == AMDGPU::BI__builtin_amdgcn_processor_is ||
|
|
ID == AMDGPU::BI__builtin_amdgcn_is_invocable)
|
|
return true;
|
|
if (llvm::find(GuardedBuiltins, ID) != GuardedBuiltins.end())
|
|
return true;
|
|
|
|
StringRef FL(BInfo.getRequiredFeatures(ID));
|
|
llvm::StringMap<bool> FeatureMap;
|
|
if (CurrentGFXIP.empty()) {
|
|
for (auto &&F : TargetFeatures)
|
|
FeatureMap[F] = true;
|
|
for (auto &&GID : GuardedBuiltins)
|
|
for (auto &&F : llvm::split(BInfo.getRequiredFeatures(GID), ','))
|
|
FeatureMap[F] = true;
|
|
} else {
|
|
static const llvm::Triple AMDGCN("amdgcn-amd-amdhsa");
|
|
llvm::AMDGPU::fillAMDGPUFeatureMap(CurrentGFXIP.back().second, AMDGCN,
|
|
FeatureMap);
|
|
}
|
|
|
|
FunctionDecl *BI = CE->getDirectCallee();
|
|
SourceLocation BICallLoc = CE->getExprLoc();
|
|
if (Builtin::evaluateRequiredTargetFeatures(FL, FeatureMap)) {
|
|
SemaRef.Diag(BICallLoc, diag::warn_amdgcn_unguarded_builtin) << BI;
|
|
SemaRef.Diag(BICallLoc, diag::note_amdgcn_unguarded_builtin_silence) << BI;
|
|
} else {
|
|
StringRef GFXIP = CurrentGFXIP.empty() ? "" : CurrentGFXIP.back().second;
|
|
SemaRef.Diag(BICallLoc, diag::err_amdgcn_incompatible_builtin)
|
|
<< BI << FL << !CurrentGFXIP.empty() << GFXIP;
|
|
if (!CurrentGFXIP.empty())
|
|
SemaRef.Diag(CurrentGFXIP.back().first,
|
|
diag::note_amdgcn_previous_is_processor_guard);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
} // Unnamed namespace
|
|
|
|
void SemaAMDGPU::DiagnoseUnguardedBuiltinUsage(FunctionDecl *FD) {
|
|
DiagnoseUnguardedBuiltins(SemaRef).IssueDiagnostics(FD->getBody());
|
|
}
|
|
} // namespace clang
|