[HIP] Support target id by --offload-arch
This patch introduces support of target id by -offload-arch. Differential Revision: https://reviews.llvm.org/D60620
This commit is contained in:
parent
cacfb02d28
commit
7546b29e76
@ -73,6 +73,11 @@ def warn_drv_unknown_cuda_version: Warning<
|
||||
InGroup<CudaUnknownVersion>;
|
||||
def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
|
||||
def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
|
||||
def err_drv_bad_target_id : Error<"Invalid target ID: %0 (A target ID is a processor name "
|
||||
"followed by an optional list of predefined features post-fixed by a plus or minus sign deliminated "
|
||||
"by colon, e.g. 'gfx908:sram-ecc+:xnack-')">;
|
||||
def err_drv_bad_offload_arch_combo : Error<"Invalid offload arch combinations: %0 and %1 (For a specific "
|
||||
"processor, a feature should either exist in all offload archs, or not exist in any offload archs)">;
|
||||
def err_drv_invalid_thread_model_for_target : Error<
|
||||
"invalid thread model '%0' in '%1' for this target">;
|
||||
def err_drv_invalid_linker_name : Error<
|
||||
|
||||
56
clang/include/clang/Basic/TargetID.h
Normal file
56
clang/include/clang/Basic/TargetID.h
Normal file
@ -0,0 +1,56 @@
|
||||
//===--- TargetID.h - Utilities for target ID -------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_BASIC_TARGET_ID_H
|
||||
#define LLVM_CLANG_BASIC_TARGET_ID_H
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include <set>
|
||||
|
||||
namespace clang {
|
||||
|
||||
/// Get all feature strings that can be used in target ID for \p Processor.
|
||||
/// Target ID is a processor name with optional feature strings
|
||||
/// postfixed by a plus or minus sign delimited by colons, e.g.
|
||||
/// gfx908:xnack+:sram-ecc-. Each processor have a limited
|
||||
/// number of predefined features when showing up in a target ID.
|
||||
const llvm::SmallVector<llvm::StringRef, 4>
|
||||
getAllPossibleTargetIDFeatures(const llvm::Triple &T,
|
||||
llvm::StringRef Processor);
|
||||
|
||||
/// Get processor name from target ID.
|
||||
/// Returns canonical processor name or empty if the processor name is invalid.
|
||||
llvm::StringRef getProcessorFromTargetID(const llvm::Triple &T,
|
||||
llvm::StringRef OffloadArch);
|
||||
|
||||
/// Parse a target ID to get processor and feature map.
|
||||
/// Returns canonicalized processor name or None if the target ID is invalid.
|
||||
/// Returns target ID features in \p FeatureMap if it is not null pointer.
|
||||
/// This function assumes \p OffloadArch is a valid target ID.
|
||||
/// If the target ID contains feature+, map it to true.
|
||||
/// If the target ID contains feature-, map it to false.
|
||||
/// If the target ID does not contain a feature (default), do not map it.
|
||||
llvm::Optional<llvm::StringRef>
|
||||
parseTargetID(const llvm::Triple &T, llvm::StringRef OffloadArch,
|
||||
llvm::StringMap<bool> *FeatureMap);
|
||||
|
||||
/// Returns canonical target ID, assuming \p Processor is canonical and all
|
||||
/// entries in \p Features are valid.
|
||||
std::string getCanonicalTargetID(llvm::StringRef Processor,
|
||||
const llvm::StringMap<bool> &Features);
|
||||
|
||||
/// Get the conflicted pair of target IDs for a compilation or a bundled code
|
||||
/// object, assuming \p TargetIDs are canonicalized. If there is no conflicts,
|
||||
/// returns None.
|
||||
llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
|
||||
getConflictTargetIDCombination(const std::set<llvm::StringRef> &TargetIDs);
|
||||
} // namespace clang
|
||||
|
||||
#endif
|
||||
@ -1061,6 +1061,9 @@ public:
|
||||
return Triple;
|
||||
}
|
||||
|
||||
/// Returns the target ID if supported.
|
||||
virtual llvm::Optional<std::string> getTargetID() const { return llvm::None; }
|
||||
|
||||
const llvm::DataLayout &getDataLayout() const {
|
||||
assert(DataLayout && "Uninitialized DataLayout!");
|
||||
return *DataLayout;
|
||||
|
||||
@ -297,6 +297,10 @@ public:
|
||||
/// Return whether an error during the parsing of the input args.
|
||||
bool containsError() const { return ContainsError; }
|
||||
|
||||
/// Force driver to fail before toolchain is created. This is necessary when
|
||||
/// error happens in action builder.
|
||||
void setContainsError() { ContainsError = true; }
|
||||
|
||||
/// Redirect - Redirect output of this compilation. Can only be done once.
|
||||
///
|
||||
/// \param Redirects - array of optional paths. The array should have a size
|
||||
|
||||
@ -609,7 +609,10 @@ def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[DriverOpti
|
||||
def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>,
|
||||
HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
|
||||
def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[DriverOption]>,
|
||||
HelpText<"CUDA/HIP offloading device architecture (e.g. sm_35, gfx906). May be specified more than once.">;
|
||||
HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target ID in the form of a "
|
||||
"device architecture followed by target ID features delimited by a colon. Each target ID feature "
|
||||
"is a pre-defined string followed by a plus or minus sign (e.g. gfx908:xnack+:sram-ecc-). May be "
|
||||
"specified more than once.">;
|
||||
def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
|
||||
Alias<offload_arch_EQ>;
|
||||
def hip_link : Flag<["--"], "hip-link">,
|
||||
|
||||
@ -62,6 +62,7 @@ add_clang_library(clangBasic
|
||||
SourceLocation.cpp
|
||||
SourceManager.cpp
|
||||
Stack.cpp
|
||||
TargetID.cpp
|
||||
TargetInfo.cpp
|
||||
Targets.cpp
|
||||
Targets/AArch64.cpp
|
||||
|
||||
169
clang/lib/Basic/TargetID.cpp
Normal file
169
clang/lib/Basic/TargetID.cpp
Normal file
@ -0,0 +1,169 @@
|
||||
//===--- TargetID.cpp - Utilities for parsing target ID -------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang/Basic/TargetID.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Support/TargetParser.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <map>
|
||||
|
||||
namespace clang {
|
||||
|
||||
static const llvm::SmallVector<llvm::StringRef, 4>
|
||||
getAllPossibleAMDGPUTargetIDFeatures(const llvm::Triple &T,
|
||||
llvm::StringRef Proc) {
|
||||
// Entries in returned vector should be in alphabetical order.
|
||||
llvm::SmallVector<llvm::StringRef, 4> Ret;
|
||||
auto ProcKind = T.isAMDGCN() ? llvm::AMDGPU::parseArchAMDGCN(Proc)
|
||||
: llvm::AMDGPU::parseArchR600(Proc);
|
||||
if (ProcKind == llvm::AMDGPU::GK_NONE)
|
||||
return Ret;
|
||||
auto Features = T.isAMDGCN() ? llvm::AMDGPU::getArchAttrAMDGCN(ProcKind)
|
||||
: llvm::AMDGPU::getArchAttrR600(ProcKind);
|
||||
if (Features & llvm::AMDGPU::FEATURE_SRAM_ECC)
|
||||
Ret.push_back("sram-ecc");
|
||||
if (Features & llvm::AMDGPU::FEATURE_XNACK)
|
||||
Ret.push_back("xnack");
|
||||
return Ret;
|
||||
}
|
||||
|
||||
const llvm::SmallVector<llvm::StringRef, 4>
|
||||
getAllPossibleTargetIDFeatures(const llvm::Triple &T,
|
||||
llvm::StringRef Processor) {
|
||||
llvm::SmallVector<llvm::StringRef, 4> Ret;
|
||||
if (T.isAMDGPU())
|
||||
return getAllPossibleAMDGPUTargetIDFeatures(T, Processor);
|
||||
return Ret;
|
||||
}
|
||||
|
||||
/// Returns canonical processor name or empty string if \p Processor is invalid.
|
||||
static llvm::StringRef getCanonicalProcessorName(const llvm::Triple &T,
|
||||
llvm::StringRef Processor) {
|
||||
if (T.isAMDGPU())
|
||||
return llvm::AMDGPU::getCanonicalArchName(T, Processor);
|
||||
return Processor;
|
||||
}
|
||||
|
||||
llvm::StringRef getProcessorFromTargetID(const llvm::Triple &T,
|
||||
llvm::StringRef TargetID) {
|
||||
auto Split = TargetID.split(':');
|
||||
return getCanonicalProcessorName(T, Split.first);
|
||||
}
|
||||
|
||||
// Parse a target ID with format checking only. Do not check whether processor
|
||||
// name or features are valid for the processor.
|
||||
//
|
||||
// A target ID is a processor name followed by a list of target features
|
||||
// delimited by colon. Each target feature is a string post-fixed by a plus
|
||||
// or minus sign, e.g. gfx908:sram-ecc+:xnack-.
|
||||
static llvm::Optional<llvm::StringRef>
|
||||
parseTargetIDWithFormatCheckingOnly(llvm::StringRef TargetID,
|
||||
llvm::StringMap<bool> *FeatureMap) {
|
||||
llvm::StringRef Processor;
|
||||
|
||||
if (TargetID.empty())
|
||||
return llvm::StringRef();
|
||||
|
||||
auto Split = TargetID.split(':');
|
||||
Processor = Split.first;
|
||||
if (Processor.empty())
|
||||
return llvm::None;
|
||||
|
||||
auto Features = Split.second;
|
||||
if (Features.empty())
|
||||
return Processor;
|
||||
|
||||
llvm::StringMap<bool> LocalFeatureMap;
|
||||
if (!FeatureMap)
|
||||
FeatureMap = &LocalFeatureMap;
|
||||
|
||||
while (!Features.empty()) {
|
||||
auto Splits = Features.split(':');
|
||||
auto Sign = Splits.first.back();
|
||||
auto Feature = Splits.first.drop_back();
|
||||
if (Sign != '+' && Sign != '-')
|
||||
return llvm::None;
|
||||
bool IsOn = Sign == '+';
|
||||
auto Loc = FeatureMap->find(Feature);
|
||||
// Each feature can only show up at most once in target ID.
|
||||
if (Loc != FeatureMap->end())
|
||||
return llvm::None;
|
||||
(*FeatureMap)[Feature] = IsOn;
|
||||
Features = Splits.second;
|
||||
}
|
||||
return Processor;
|
||||
};
|
||||
|
||||
llvm::Optional<llvm::StringRef>
|
||||
parseTargetID(const llvm::Triple &T, llvm::StringRef TargetID,
|
||||
llvm::StringMap<bool> *FeatureMap) {
|
||||
auto OptionalProcessor =
|
||||
parseTargetIDWithFormatCheckingOnly(TargetID, FeatureMap);
|
||||
|
||||
if (!OptionalProcessor)
|
||||
return llvm::None;
|
||||
|
||||
llvm::StringRef Processor =
|
||||
getCanonicalProcessorName(T, OptionalProcessor.getValue());
|
||||
if (Processor.empty())
|
||||
return llvm::None;
|
||||
|
||||
llvm::SmallSet<llvm::StringRef, 4> AllFeatures;
|
||||
for (auto &&F : getAllPossibleTargetIDFeatures(T, Processor))
|
||||
AllFeatures.insert(F);
|
||||
|
||||
for (auto &&F : *FeatureMap)
|
||||
if (!AllFeatures.count(F.first()))
|
||||
return llvm::None;
|
||||
|
||||
return Processor;
|
||||
};
|
||||
|
||||
// A canonical target ID is a target ID containing a canonical processor name
|
||||
// and features in alphabetical order.
|
||||
std::string getCanonicalTargetID(llvm::StringRef Processor,
|
||||
const llvm::StringMap<bool> &Features) {
|
||||
std::string TargetID = Processor.str();
|
||||
std::map<const llvm::StringRef, bool> OrderedMap;
|
||||
for (const auto &F : Features)
|
||||
OrderedMap[F.first()] = F.second;
|
||||
for (auto F : OrderedMap)
|
||||
TargetID = TargetID + ':' + F.first.str() + (F.second ? "+" : "-");
|
||||
return TargetID;
|
||||
}
|
||||
|
||||
// For a specific processor, a feature either shows up in all target IDs, or
|
||||
// does not show up in any target IDs. Otherwise the target ID combination
|
||||
// is invalid.
|
||||
llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
|
||||
getConflictTargetIDCombination(const std::set<llvm::StringRef> &TargetIDs) {
|
||||
struct Info {
|
||||
llvm::StringRef TargetID;
|
||||
llvm::StringMap<bool> Features;
|
||||
};
|
||||
llvm::StringMap<Info> FeatureMap;
|
||||
for (auto &&ID : TargetIDs) {
|
||||
llvm::StringMap<bool> Features;
|
||||
llvm::StringRef Proc =
|
||||
parseTargetIDWithFormatCheckingOnly(ID, &Features).getValue();
|
||||
auto Loc = FeatureMap.find(Proc);
|
||||
if (Loc == FeatureMap.end())
|
||||
FeatureMap[Proc] = Info{ID, Features};
|
||||
else {
|
||||
auto &ExistingFeatures = Loc->second.Features;
|
||||
if (llvm::any_of(Features, [&](auto &F) {
|
||||
return ExistingFeatures.count(F.first()) == 0;
|
||||
}))
|
||||
return std::make_pair(Loc->second.TargetID, ID);
|
||||
}
|
||||
}
|
||||
return llvm::None;
|
||||
}
|
||||
|
||||
} // namespace clang
|
||||
@ -357,6 +357,23 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
StringRef CanonName = isAMDGCN(getTriple()) ?
|
||||
getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
|
||||
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
|
||||
if (isAMDGCN(getTriple())) {
|
||||
Builder.defineMacro("__amdgcn_processor__",
|
||||
Twine("\"") + Twine(CanonName) + Twine("\""));
|
||||
Builder.defineMacro("__amdgcn_target_id__",
|
||||
Twine("\"") + Twine(getTargetID().getValue()) +
|
||||
Twine("\""));
|
||||
for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
|
||||
auto Loc = OffloadArchFeatures.find(F);
|
||||
if (Loc != OffloadArchFeatures.end()) {
|
||||
std::string NewF = F.str();
|
||||
std::replace(NewF.begin(), NewF.end(), '-', '_');
|
||||
Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
|
||||
Twine("__"),
|
||||
Loc->second ? "1" : "0");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
|
||||
#define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
|
||||
|
||||
#include "clang/Basic/TargetID.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/Basic/TargetOptions.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
@ -41,6 +42,14 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
|
||||
llvm::AMDGPU::GPUKind GPUKind;
|
||||
unsigned GPUFeatures;
|
||||
|
||||
/// Target ID is device name followed by optional feature name postfixed
|
||||
/// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sram-ecc-.
|
||||
/// If the target ID contains feature+, map it to true.
|
||||
/// If the target ID contains feature-, map it to false.
|
||||
/// If the target ID does not contain a feature (default), do not map it.
|
||||
llvm::StringMap<bool> OffloadArchFeatures;
|
||||
std::string TargetID;
|
||||
|
||||
bool hasFP64() const {
|
||||
return getTriple().getArch() == llvm::Triple::amdgcn ||
|
||||
!!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64);
|
||||
@ -389,6 +398,35 @@ public:
|
||||
void setAuxTarget(const TargetInfo *Aux) override;
|
||||
|
||||
bool hasExtIntType() const override { return true; }
|
||||
|
||||
// Record offload arch features since they are needed for defining the
|
||||
// pre-defined macros.
|
||||
bool handleTargetFeatures(std::vector<std::string> &Features,
|
||||
DiagnosticsEngine &Diags) override {
|
||||
auto TargetIDFeatures =
|
||||
getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind));
|
||||
llvm::for_each(Features, [&](const auto &F) {
|
||||
assert(F.front() == '+' || F.front() == '-');
|
||||
bool IsOn = F.front() == '+';
|
||||
StringRef Name = StringRef(F).drop_front();
|
||||
if (llvm::find(TargetIDFeatures, Name) == TargetIDFeatures.end())
|
||||
return;
|
||||
assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end());
|
||||
OffloadArchFeatures[Name] = IsOn;
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
Optional<std::string> getTargetID() const override {
|
||||
if (!isAMDGCN(getTriple()))
|
||||
return llvm::None;
|
||||
// When -target-cpu is not set, we assume generic code that it is valid
|
||||
// for all GPU and use an empty string as target ID to represent that.
|
||||
if (GPUKind == llvm::AMDGPU::GK_NONE)
|
||||
return std::string("");
|
||||
return getCanonicalTargetID(getArchNameAMDGCN(GPUKind),
|
||||
OffloadArchFeatures);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace targets
|
||||
|
||||
@ -46,6 +46,7 @@
|
||||
#include "ToolChains/VEToolchain.h"
|
||||
#include "ToolChains/WebAssembly.h"
|
||||
#include "ToolChains/XCore.h"
|
||||
#include "clang/Basic/TargetID.h"
|
||||
#include "clang/Basic/Version.h"
|
||||
#include "clang/Config/config.h"
|
||||
#include "clang/Driver/Action.h"
|
||||
@ -93,6 +94,11 @@ using namespace clang::driver;
|
||||
using namespace clang;
|
||||
using namespace llvm::opt;
|
||||
|
||||
static llvm::Triple getHIPOffloadTargetTriple() {
|
||||
static const llvm::Triple T("amdgcn-amd-amdhsa");
|
||||
return T;
|
||||
}
|
||||
|
||||
// static
|
||||
std::string Driver::GetResourcesPath(StringRef BinaryPath,
|
||||
StringRef CustomResourceDir) {
|
||||
@ -672,10 +678,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
|
||||
} else if (IsHIP) {
|
||||
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
|
||||
const llvm::Triple &HostTriple = HostTC->getTriple();
|
||||
StringRef DeviceTripleStr;
|
||||
auto OFK = Action::OFK_HIP;
|
||||
DeviceTripleStr = "amdgcn-amd-amdhsa";
|
||||
llvm::Triple HIPTriple(DeviceTripleStr);
|
||||
llvm::Triple HIPTriple = getHIPOffloadTargetTriple();
|
||||
// Use the HIP and host triples as the key into the ToolChains map,
|
||||
// because the device toolchain we create depends on both.
|
||||
auto &HIPTC = ToolChains[HIPTriple.str() + "/" + HostTriple.str()];
|
||||
@ -2390,8 +2394,20 @@ class OffloadingActionBuilder final {
|
||||
bool EmitLLVM = false;
|
||||
bool EmitAsm = false;
|
||||
|
||||
/// ID to identify each device compilation. For CUDA it is simply the
|
||||
/// GPU arch string. For HIP it is either the GPU arch string or GPU
|
||||
/// arch string plus feature strings delimited by a plus sign, e.g.
|
||||
/// gfx906+xnack.
|
||||
struct TargetID {
|
||||
/// Target ID string which is persistent throughout the compilation.
|
||||
const char *ID;
|
||||
TargetID(CudaArch Arch) { ID = CudaArchToString(Arch); }
|
||||
TargetID(const char *ID) : ID(ID) {}
|
||||
operator const char *() { return ID; }
|
||||
operator StringRef() { return StringRef(ID); }
|
||||
};
|
||||
/// List of GPU architectures to use in this compilation.
|
||||
SmallVector<CudaArch, 4> GpuArchList;
|
||||
SmallVector<TargetID, 4> GpuArchList;
|
||||
|
||||
/// The CUDA actions for the current input.
|
||||
ActionList CudaDeviceActions;
|
||||
@ -2474,7 +2490,7 @@ class OffloadingActionBuilder final {
|
||||
|
||||
for (auto Arch : GpuArchList) {
|
||||
CudaDeviceActions.push_back(UA);
|
||||
UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch),
|
||||
UA->registerDependentActionInfo(ToolChains[0], Arch,
|
||||
AssociatedOffloadKind);
|
||||
}
|
||||
return ABRT_Success;
|
||||
@ -2485,10 +2501,9 @@ class OffloadingActionBuilder final {
|
||||
|
||||
void appendTopLevelActions(ActionList &AL) override {
|
||||
// Utility to append actions to the top level list.
|
||||
auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
|
||||
auto AddTopLevel = [&](Action *A, TargetID TargetID) {
|
||||
OffloadAction::DeviceDependences Dep;
|
||||
Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
|
||||
AssociatedOffloadKind);
|
||||
Dep.add(*A, *ToolChains.front(), TargetID, AssociatedOffloadKind);
|
||||
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
|
||||
};
|
||||
|
||||
@ -2516,6 +2531,13 @@ class OffloadingActionBuilder final {
|
||||
CudaDeviceActions.clear();
|
||||
}
|
||||
|
||||
/// Get canonicalized offload arch option. \returns empty StringRef if the
|
||||
/// option is invalid.
|
||||
virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0;
|
||||
|
||||
virtual llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
|
||||
getConflictOffloadArchCombination(const std::set<StringRef> &GpuArchs) = 0;
|
||||
|
||||
bool initialize() override {
|
||||
assert(AssociatedOffloadKind == Action::OFK_Cuda ||
|
||||
AssociatedOffloadKind == Action::OFK_HIP);
|
||||
@ -2563,7 +2585,7 @@ class OffloadingActionBuilder final {
|
||||
EmitAsm = Args.getLastArg(options::OPT_S);
|
||||
|
||||
// Collect all cuda_gpu_arch parameters, removing duplicates.
|
||||
std::set<CudaArch> GpuArchs;
|
||||
std::set<StringRef> GpuArchs;
|
||||
bool Error = false;
|
||||
for (Arg *A : Args) {
|
||||
if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
|
||||
@ -2571,27 +2593,35 @@ class OffloadingActionBuilder final {
|
||||
continue;
|
||||
A->claim();
|
||||
|
||||
const StringRef ArchStr = A->getValue();
|
||||
StringRef ArchStr = A->getValue();
|
||||
if (A->getOption().matches(options::OPT_no_offload_arch_EQ) &&
|
||||
ArchStr == "all") {
|
||||
GpuArchs.clear();
|
||||
continue;
|
||||
}
|
||||
CudaArch Arch = StringToCudaArch(ArchStr);
|
||||
if (Arch == CudaArch::UNKNOWN) {
|
||||
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
|
||||
ArchStr = getCanonicalOffloadArch(ArchStr);
|
||||
if (ArchStr.empty()) {
|
||||
Error = true;
|
||||
} else if (A->getOption().matches(options::OPT_offload_arch_EQ))
|
||||
GpuArchs.insert(Arch);
|
||||
GpuArchs.insert(ArchStr);
|
||||
else if (A->getOption().matches(options::OPT_no_offload_arch_EQ))
|
||||
GpuArchs.erase(Arch);
|
||||
GpuArchs.erase(ArchStr);
|
||||
else
|
||||
llvm_unreachable("Unexpected option.");
|
||||
}
|
||||
|
||||
auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs);
|
||||
if (ConflictingArchs) {
|
||||
C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
|
||||
<< ConflictingArchs.getValue().first
|
||||
<< ConflictingArchs.getValue().second;
|
||||
C.setContainsError();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Collect list of GPUs remaining in the set.
|
||||
for (CudaArch Arch : GpuArchs)
|
||||
GpuArchList.push_back(Arch);
|
||||
for (auto Arch : GpuArchs)
|
||||
GpuArchList.push_back(Arch.data());
|
||||
|
||||
// Default to sm_20 which is the lowest common denominator for
|
||||
// supported GPUs. sm_20 code should work correctly, if
|
||||
@ -2613,6 +2643,21 @@ class OffloadingActionBuilder final {
|
||||
DefaultCudaArch = CudaArch::SM_20;
|
||||
}
|
||||
|
||||
StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
|
||||
CudaArch Arch = StringToCudaArch(ArchStr);
|
||||
if (Arch == CudaArch::UNKNOWN) {
|
||||
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
|
||||
return StringRef();
|
||||
}
|
||||
return CudaArchToString(Arch);
|
||||
}
|
||||
|
||||
llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
|
||||
getConflictOffloadArchCombination(
|
||||
const std::set<StringRef> &GpuArchs) override {
|
||||
return llvm::None;
|
||||
}
|
||||
|
||||
ActionBuilderReturnCode
|
||||
getDeviceDependences(OffloadAction::DeviceDependences &DA,
|
||||
phases::ID CurPhase, phases::ID FinalPhase,
|
||||
@ -2672,8 +2717,7 @@ class OffloadingActionBuilder final {
|
||||
|
||||
for (auto &A : {AssembleAction, BackendAction}) {
|
||||
OffloadAction::DeviceDependences DDep;
|
||||
DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
|
||||
Action::OFK_Cuda);
|
||||
DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda);
|
||||
DeviceActions.push_back(
|
||||
C.MakeAction<OffloadAction>(DDep, A->getType()));
|
||||
}
|
||||
@ -2732,6 +2776,24 @@ class OffloadingActionBuilder final {
|
||||
|
||||
bool canUseBundlerUnbundler() const override { return true; }
|
||||
|
||||
StringRef getCanonicalOffloadArch(StringRef IdStr) override {
|
||||
llvm::StringMap<bool> Features;
|
||||
auto ArchStr =
|
||||
parseTargetID(getHIPOffloadTargetTriple(), IdStr, &Features);
|
||||
if (!ArchStr) {
|
||||
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
|
||||
return StringRef();
|
||||
}
|
||||
auto CanId = getCanonicalTargetID(ArchStr.getValue(), Features);
|
||||
return Args.MakeArgStringRef(CanId);
|
||||
};
|
||||
|
||||
llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
|
||||
getConflictOffloadArchCombination(
|
||||
const std::set<StringRef> &GpuArchs) override {
|
||||
return getConflictTargetIDCombination(GpuArchs);
|
||||
}
|
||||
|
||||
ActionBuilderReturnCode
|
||||
getDeviceDependences(OffloadAction::DeviceDependences &DA,
|
||||
phases::ID CurPhase, phases::ID FinalPhase,
|
||||
@ -2776,8 +2838,8 @@ class OffloadingActionBuilder final {
|
||||
// device arch of the next action being propagated to the above link
|
||||
// action.
|
||||
OffloadAction::DeviceDependences DDep;
|
||||
DDep.add(*CudaDeviceActions[I], *ToolChains.front(),
|
||||
CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
|
||||
DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
|
||||
AssociatedOffloadKind);
|
||||
CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
|
||||
DDep, CudaDeviceActions[I]->getType());
|
||||
}
|
||||
@ -2844,7 +2906,7 @@ class OffloadingActionBuilder final {
|
||||
// LI contains all the inputs for the linker.
|
||||
OffloadAction::DeviceDependences DeviceLinkDeps;
|
||||
DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
|
||||
CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
|
||||
GpuArchList[I], AssociatedOffloadKind);
|
||||
AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
|
||||
DeviceLinkAction->getType()));
|
||||
++I;
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
#include "AMDGPU.h"
|
||||
#include "CommonArgs.h"
|
||||
#include "InputInfo.h"
|
||||
#include "clang/Basic/TargetID.h"
|
||||
#include "clang/Driver/Compilation.h"
|
||||
#include "clang/Driver/DriverDiagnostic.h"
|
||||
#include "llvm/Option/ArgList.h"
|
||||
@ -360,11 +361,34 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
}
|
||||
|
||||
void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
|
||||
const llvm::Triple &Triple,
|
||||
const llvm::opt::ArgList &Args,
|
||||
std::vector<StringRef> &Features) {
|
||||
if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
|
||||
D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
|
||||
|
||||
// Add target ID features to -target-feature options. No diagnostics should
|
||||
// be emitted here since invalid target ID is diagnosed at other places.
|
||||
StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
|
||||
if (!TargetID.empty()) {
|
||||
llvm::StringMap<bool> FeatureMap;
|
||||
auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
|
||||
if (OptionalGpuArch) {
|
||||
StringRef GpuArch = OptionalGpuArch.getValue();
|
||||
// Iterate through all possible target ID features for the given GPU.
|
||||
// If it is mapped to true, add +feature.
|
||||
// If it is mapped to false, add -feature.
|
||||
// If it is not in the map (default), do not add it
|
||||
for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
|
||||
auto Pos = FeatureMap.find(Feature);
|
||||
if (Pos == FeatureMap.end())
|
||||
continue;
|
||||
Features.push_back(Args.MakeArgStringRef(
|
||||
(Twine(Pos->second ? "+" : "-") + Feature).str()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Args.getLastArg(options::OPT_mwavefrontsize64)) {
|
||||
Features.push_back("-wavefrontsize16");
|
||||
Features.push_back("-wavefrontsize32");
|
||||
@ -398,16 +422,15 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
|
||||
DerivedArgList *DAL =
|
||||
Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
|
||||
|
||||
// Do nothing if not OpenCL (-x cl)
|
||||
if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
|
||||
return DAL;
|
||||
const OptTable &Opts = getDriver().getOpts();
|
||||
|
||||
if (!DAL)
|
||||
DAL = new DerivedArgList(Args.getBaseArgs());
|
||||
for (auto *A : Args)
|
||||
DAL->append(A);
|
||||
|
||||
const OptTable &Opts = getDriver().getOpts();
|
||||
if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
|
||||
return DAL;
|
||||
|
||||
// Phase 1 (.cl -> .bc)
|
||||
if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
|
||||
@ -452,7 +475,8 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
|
||||
|
||||
if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
|
||||
JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
|
||||
auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
|
||||
auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
|
||||
auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
|
||||
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
|
||||
DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
|
||||
options::OPT_fno_cuda_flush_denormals_to_zero,
|
||||
@ -462,7 +486,7 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
|
||||
return llvm::DenormalMode::getIEEE();
|
||||
}
|
||||
|
||||
const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
|
||||
const StringRef GpuArch = getGPUArch(DriverArgs);
|
||||
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
|
||||
|
||||
// TODO: There are way too many flags that change this. Do we need to check
|
||||
@ -497,6 +521,8 @@ void AMDGPUToolChain::addClangTargetOptions(
|
||||
const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args,
|
||||
Action::OffloadKind DeviceOffloadingKind) const {
|
||||
// Allow using target ID in -mcpu.
|
||||
translateTargetID(DriverArgs, CC1Args);
|
||||
// Default to "hidden" visibility, as object level linking will not be
|
||||
// supported for the foreseeable future.
|
||||
if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
|
||||
@ -507,6 +533,29 @@ void AMDGPUToolChain::addClangTargetOptions(
|
||||
}
|
||||
}
|
||||
|
||||
StringRef
|
||||
AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
|
||||
return getProcessorFromTargetID(
|
||||
getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
|
||||
}
|
||||
|
||||
StringRef
|
||||
AMDGPUToolChain::translateTargetID(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const {
|
||||
StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
|
||||
if (TargetID.empty())
|
||||
return StringRef();
|
||||
|
||||
llvm::StringMap<bool> FeatureMap;
|
||||
auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
|
||||
if (!OptionalGpuArch) {
|
||||
getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID;
|
||||
return StringRef();
|
||||
}
|
||||
|
||||
return OptionalGpuArch.getValue();
|
||||
}
|
||||
|
||||
void ROCMToolChain::addClangTargetOptions(
|
||||
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
|
||||
Action::OffloadKind DeviceOffloadingKind) const {
|
||||
@ -528,7 +577,7 @@ void ROCMToolChain::addClangTargetOptions(
|
||||
}
|
||||
|
||||
// Get the device name and canonicalize it
|
||||
const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
|
||||
const StringRef GpuArch = getGPUArch(DriverArgs);
|
||||
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
|
||||
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
|
||||
std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
|
||||
#include "Gnu.h"
|
||||
#include "ROCm.h"
|
||||
#include "clang/Basic/TargetID.h"
|
||||
#include "clang/Driver/Options.h"
|
||||
#include "clang/Driver/Tool.h"
|
||||
#include "clang/Driver/ToolChain.h"
|
||||
@ -36,7 +37,8 @@ public:
|
||||
const char *LinkingOutput) const override;
|
||||
};
|
||||
|
||||
void getAMDGPUTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
|
||||
void getAMDGPUTargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
const llvm::opt::ArgList &Args,
|
||||
std::vector<StringRef> &Features);
|
||||
|
||||
} // end namespace amdgpu
|
||||
@ -87,6 +89,14 @@ public:
|
||||
|
||||
/// Needed for translating LTO options.
|
||||
const char *getDefaultLinker() const override { return "ld.lld"; }
|
||||
|
||||
protected:
|
||||
/// Translate -mcpu option containing target ID to cc1 options.
|
||||
/// Returns the GPU name.
|
||||
StringRef translateTargetID(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args) const;
|
||||
|
||||
StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const;
|
||||
};
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
|
||||
|
||||
@ -366,7 +366,7 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
break;
|
||||
case llvm::Triple::r600:
|
||||
case llvm::Triple::amdgcn:
|
||||
amdgpu::getAMDGPUTargetFeatures(D, Args, Features);
|
||||
amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features);
|
||||
break;
|
||||
case llvm::Triple::msp430:
|
||||
msp430::getMSP430TargetFeatures(D, Args, Features);
|
||||
|
||||
@ -226,11 +226,12 @@ void tools::AddTargetFeature(const ArgList &Args,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the (LLVM) name of the R600 gpu we are targeting.
|
||||
static std::string getR600TargetGPU(const ArgList &Args) {
|
||||
/// Get the (LLVM) name of the AMDGPU gpu we are targeting.
|
||||
static std::string getAMDGPUTargetGPU(const llvm::Triple &T,
|
||||
const ArgList &Args) {
|
||||
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
|
||||
const char *GPUName = A->getValue();
|
||||
return llvm::StringSwitch<const char *>(GPUName)
|
||||
auto GPUName = getProcessorFromTargetID(T, A->getValue());
|
||||
return llvm::StringSwitch<std::string>(GPUName)
|
||||
.Cases("rv630", "rv635", "r600")
|
||||
.Cases("rv610", "rv620", "rs780", "rs880")
|
||||
.Case("rv740", "rv770")
|
||||
@ -238,7 +239,7 @@ static std::string getR600TargetGPU(const ArgList &Args) {
|
||||
.Cases("sumo", "sumo2", "sumo")
|
||||
.Case("hemlock", "cypress")
|
||||
.Case("aruba", "cayman")
|
||||
.Default(GPUName);
|
||||
.Default(GPUName.str());
|
||||
}
|
||||
return "";
|
||||
}
|
||||
@ -364,7 +365,7 @@ std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
|
||||
|
||||
case llvm::Triple::r600:
|
||||
case llvm::Triple::amdgcn:
|
||||
return getR600TargetGPU(Args);
|
||||
return getAMDGPUTargetGPU(T, Args);
|
||||
|
||||
case llvm::Triple::wasm32:
|
||||
case llvm::Triple::wasm64:
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include "CommonArgs.h"
|
||||
#include "InputInfo.h"
|
||||
#include "clang/Basic/Cuda.h"
|
||||
#include "clang/Basic/TargetID.h"
|
||||
#include "clang/Driver/Compilation.h"
|
||||
#include "clang/Driver/Driver.h"
|
||||
#include "clang/Driver/DriverDiagnostic.h"
|
||||
@ -68,7 +69,7 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
|
||||
|
||||
// Extract all the -m options
|
||||
std::vector<llvm::StringRef> Features;
|
||||
amdgpu::getAMDGPUTargetFeatures(D, Args, Features);
|
||||
amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
|
||||
|
||||
// Add features to mattr such as cumode
|
||||
std::string MAttrString = "-plugin-opt=-mattr=";
|
||||
@ -232,7 +233,8 @@ void HIPToolChain::addClangTargetOptions(
|
||||
Action::OffloadKind DeviceOffloadingKind) const {
|
||||
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
|
||||
|
||||
StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
|
||||
// Allow using target ID in --offload-arch.
|
||||
StringRef GpuArch = translateTargetID(DriverArgs, CC1Args);
|
||||
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
|
||||
(void) GpuArch;
|
||||
assert(DeviceOffloadingKind == Action::OFK_HIP &&
|
||||
|
||||
@ -12,19 +12,19 @@
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-code-object-v3 %s 2>&1 | FileCheck --check-prefix=NO-CODE-OBJECT-V3 %s
|
||||
// NO-CODE-OBJECT-V3: "-target-feature" "-code-object-v3"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mxnack %s 2>&1 | FileCheck --check-prefix=XNACK %s
|
||||
// RUN: %clang -### -target amdgcn-amdhsa -mcpu=gfx900:xnack+ %s 2>&1 | FileCheck --check-prefix=XNACK %s
|
||||
// XNACK: "-target-feature" "+xnack"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-xnack %s 2>&1 | FileCheck --check-prefix=NO-XNACK %s
|
||||
// RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx900:xnack- %s 2>&1 | FileCheck --check-prefix=NO-XNACK %s
|
||||
// NO-XNACK: "-target-feature" "-xnack"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx700 -msram-ecc %s 2>&1 | FileCheck --check-prefix=SRAM-ECC %s
|
||||
// RUN: %clang -### -target amdgcn-mesa3d -mcpu=gfx908:sram-ecc+ %s 2>&1 | FileCheck --check-prefix=SRAM-ECC %s
|
||||
// SRAM-ECC: "-target-feature" "+sram-ecc"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-sram-ecc %s 2>&1 | FileCheck --check-prefix=NO-SRAM-ECC %s
|
||||
// RUN: %clang -### -target amdgcn-amdhsa -mcpu=gfx908:sram-ecc- %s 2>&1 | FileCheck --check-prefix=NO-SRAM-ECC %s
|
||||
// NO-SRAM-ECC: "-target-feature" "-sram-ecc"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s
|
||||
// RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s
|
||||
// WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "-wavefrontsize32" "-target-feature" "+wavefrontsize64"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s
|
||||
|
||||
@ -324,3 +324,26 @@
|
||||
// GFX1012-DAG: #define __gfx1012__ 1
|
||||
// GFX1030-DAG: #define __gfx1030__ 1
|
||||
// GFX1031-DAG: #define __gfx1031__ 1
|
||||
|
||||
// GFX600-DAG: #define __amdgcn_processor__ "gfx600"
|
||||
// GFX601-DAG: #define __amdgcn_processor__ "gfx601"
|
||||
// GFX700-DAG: #define __amdgcn_processor__ "gfx700"
|
||||
// GFX701-DAG: #define __amdgcn_processor__ "gfx701"
|
||||
// GFX702-DAG: #define __amdgcn_processor__ "gfx702"
|
||||
// GFX703-DAG: #define __amdgcn_processor__ "gfx703"
|
||||
// GFX704-DAG: #define __amdgcn_processor__ "gfx704"
|
||||
// GFX801-DAG: #define __amdgcn_processor__ "gfx801"
|
||||
// GFX802-DAG: #define __amdgcn_processor__ "gfx802"
|
||||
// GFX803-DAG: #define __amdgcn_processor__ "gfx803"
|
||||
// GFX810-DAG: #define __amdgcn_processor__ "gfx810"
|
||||
// GFX900-DAG: #define __amdgcn_processor__ "gfx900"
|
||||
// GFX902-DAG: #define __amdgcn_processor__ "gfx902"
|
||||
// GFX904-DAG: #define __amdgcn_processor__ "gfx904"
|
||||
// GFX906-DAG: #define __amdgcn_processor__ "gfx906"
|
||||
// GFX908-DAG: #define __amdgcn_processor__ "gfx908"
|
||||
// GFX909-DAG: #define __amdgcn_processor__ "gfx909"
|
||||
// GFX1010-DAG: #define __amdgcn_processor__ "gfx1010"
|
||||
// GFX1011-DAG: #define __amdgcn_processor__ "gfx1011"
|
||||
// GFX1012-DAG: #define __amdgcn_processor__ "gfx1012"
|
||||
// GFX1030-DAG: #define __amdgcn_processor__ "gfx1030"
|
||||
// GFX1031-DAG: #define __amdgcn_processor__ "gfx1031"
|
||||
@ -54,33 +54,33 @@
|
||||
|
||||
// RUN: %clang -### -target amdgcn %s 2>&1 | FileCheck --check-prefix=GCNDEFAULT %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx600 %s 2>&1 | FileCheck --check-prefix=GFX600 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=TAHITI %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=GFX600 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx601 %s 2>&1 | FileCheck --check-prefix=GFX601 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=hainan %s 2>&1 | FileCheck --check-prefix=HAINAN %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=oland %s 2>&1 | FileCheck --check-prefix=OLAND %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=pitcairn %s 2>&1 | FileCheck --check-prefix=PITCAIRN %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=verde %s 2>&1 | FileCheck --check-prefix=VERDE %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=hainan %s 2>&1 | FileCheck --check-prefix=GFX601 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=oland %s 2>&1 | FileCheck --check-prefix=GFX601 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=pitcairn %s 2>&1 | FileCheck --check-prefix=GFX601 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=verde %s 2>&1 | FileCheck --check-prefix=GFX601 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx700 %s 2>&1 | FileCheck --check-prefix=GFX700 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=kaveri %s 2>&1 | FileCheck --check-prefix=KAVERI %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=kaveri %s 2>&1 | FileCheck --check-prefix=GFX700 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefix=GFX701 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=hawaii %s 2>&1 | FileCheck --check-prefix=HAWAII %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=hawaii %s 2>&1 | FileCheck --check-prefix=GFX701 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx702 %s 2>&1 | FileCheck --check-prefix=GFX702 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx703 %s 2>&1 | FileCheck --check-prefix=GFX703 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=kabini %s 2>&1 | FileCheck --check-prefix=KABINI %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=mullins %s 2>&1 | FileCheck --check-prefix=MULLINS %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=kabini %s 2>&1 | FileCheck --check-prefix=GFX703 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=mullins %s 2>&1 | FileCheck --check-prefix=GFX703 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx704 %s 2>&1 | FileCheck --check-prefix=GFX704 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=BONAIRE %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=GFX704 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefix=GFX801 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=carrizo %s 2>&1 | FileCheck --check-prefix=CARRIZO %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=carrizo %s 2>&1 | FileCheck --check-prefix=GFX801 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=GFX802 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=ICELAND %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=tonga %s 2>&1 | FileCheck --check-prefix=TONGA %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=GFX802 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=tonga %s 2>&1 | FileCheck --check-prefix=GFX802 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=GFX803 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=FIJI %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=polaris10 %s 2>&1 | FileCheck --check-prefix=POLARIS10 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=polaris11 %s 2>&1 | FileCheck --check-prefix=POLARIS11 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=GFX803 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=polaris10 %s 2>&1 | FileCheck --check-prefix=GFX803 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=polaris11 %s 2>&1 | FileCheck --check-prefix=GFX803 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck --check-prefix=GFX810 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=stoney %s 2>&1 | FileCheck --check-prefix=STONEY %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=stoney %s 2>&1 | FileCheck --check-prefix=GFX810 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=GFX900 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx902 %s 2>&1 | FileCheck --check-prefix=GFX902 %s
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx904 %s 2>&1 | FileCheck --check-prefix=GFX904 %s
|
||||
@ -95,33 +95,16 @@
|
||||
|
||||
// GCNDEFAULT-NOT: -target-cpu
|
||||
// GFX600: "-target-cpu" "gfx600"
|
||||
// TAHITI: "-target-cpu" "tahiti"
|
||||
// GFX601: "-target-cpu" "gfx601"
|
||||
// HAINAN: "-target-cpu" "hainan"
|
||||
// OLAND: "-target-cpu" "oland"
|
||||
// PITCAIRN: "-target-cpu" "pitcairn"
|
||||
// VERDE: "-target-cpu" "verde"
|
||||
// GFX700: "-target-cpu" "gfx700"
|
||||
// KAVERI: "-target-cpu" "kaveri"
|
||||
// GFX701: "-target-cpu" "gfx701"
|
||||
// HAWAII: "-target-cpu" "hawaii"
|
||||
// GFX702: "-target-cpu" "gfx702"
|
||||
// GFX703: "-target-cpu" "gfx703"
|
||||
// KABINI: "-target-cpu" "kabini"
|
||||
// MULLINS: "-target-cpu" "mullins"
|
||||
// GFX704: "-target-cpu" "gfx704"
|
||||
// BONAIRE: "-target-cpu" "bonaire"
|
||||
// GFX801: "-target-cpu" "gfx801"
|
||||
// CARRIZO: "-target-cpu" "carrizo"
|
||||
// GFX802: "-target-cpu" "gfx802"
|
||||
// ICELAND: "-target-cpu" "iceland"
|
||||
// TONGA: "-target-cpu" "tonga"
|
||||
// GFX803: "-target-cpu" "gfx803"
|
||||
// FIJI: "-target-cpu" "fiji"
|
||||
// POLARIS10: "-target-cpu" "polaris10"
|
||||
// POLARIS11: "-target-cpu" "polaris11"
|
||||
// GFX810: "-target-cpu" "gfx810"
|
||||
// STONEY: "-target-cpu" "stoney"
|
||||
// GFX900: "-target-cpu" "gfx900"
|
||||
// GFX902: "-target-cpu" "gfx902"
|
||||
// GFX904: "-target-cpu" "gfx904"
|
||||
|
||||
70
clang/test/Driver/hip-invalid-target-id.hip
Normal file
70
clang/test/Driver/hip-invalid-target-id.hip
Normal file
@ -0,0 +1,70 @@
|
||||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx908 \
|
||||
// RUN: --offload-arch=gfx908xnack \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
|
||||
|
||||
// NOPLUS: error: Invalid target ID: gfx908xnack
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx900 \
|
||||
// RUN: --offload-arch=gfx908:xnack+:xnack+ \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s
|
||||
|
||||
// ORDER: error: Invalid target ID: gfx908:xnack+:xnack+
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx908 \
|
||||
// RUN: --offload-arch=gfx908:unknown+ \
|
||||
// RUN: --offload-arch=gfx908+sram-ecc+unknown \
|
||||
// RUN: --offload-arch=gfx900+xnack \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s
|
||||
|
||||
// UNK: error: Invalid target ID: gfx908:unknown+
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx908 \
|
||||
// RUN: --offload-arch=gfx908:sram-ecc+:unknown+ \
|
||||
// RUN: --offload-arch=gfx900+xnack \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s
|
||||
|
||||
// MIXED: error: Invalid target ID: gfx908:sram-ecc+:unknown+
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx908 \
|
||||
// RUN: --offload-arch=gfx900:sram-ecc+ \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s
|
||||
|
||||
// UNSUP: error: Invalid target ID: gfx900:sram-ecc+
|
||||
|
||||
/ RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx908 \
|
||||
// RUN: --offload-arch=gfx900:xnack \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=NOSIGN %s
|
||||
|
||||
// NOSIGN: error: Invalid target ID: gfx900:xnack
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx908 \
|
||||
// RUN: --offload-arch=gfx900+xnack \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
|
||||
|
||||
// NOCOLON: error: Invalid target ID: gfx900+xnack
|
||||
|
||||
// RUN: not %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --offload-arch=gfx908 \
|
||||
// RUN: --offload-arch=gfx908:xnack+ \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=COMBO %s
|
||||
|
||||
// COMBO: error: Invalid offload arch combinations: gfx908 and gfx908:xnack+
|
||||
74
clang/test/Driver/hip-target-id.hip
Normal file
74
clang/test/Driver/hip-target-id.hip
Normal file
@ -0,0 +1,74 @@
|
||||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip \
|
||||
// RUN: --offload-arch=gfx908:xnack+:sram-ecc+ \
|
||||
// RUN: --offload-arch=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck %s
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip \
|
||||
// RUN: --offload-arch=gfx908:xnack+:sram-ecc+ \
|
||||
// RUN: --offload-arch=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: -save-temps \
|
||||
// RUN: %s 2>&1 | FileCheck --check-prefixes=CHECK,TMP %s
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip \
|
||||
// RUN: --offload-arch=gfx908:xnack+:sram-ecc+ \
|
||||
// RUN: --offload-arch=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: -fgpu-rdc \
|
||||
// RUN: %s 2>&1 | FileCheck --check-prefixes=CHECK %s
|
||||
|
||||
// CHECK: [[CLANG:"[^"]*clang[^"]*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: "-target-cpu" "gfx908"
|
||||
// CHECK-SAME: "-target-feature" "+sram-ecc"
|
||||
// CHECK-SAME: "-target-feature" "+xnack"
|
||||
|
||||
// TMP: [[CLANG:"[^"]*clang[^"]*"]] "-cc1as" "-triple" "amdgcn-amd-amdhsa"
|
||||
// TMP-SAME: "-target-cpu" "gfx908"
|
||||
// TMP-SAME: "-target-feature" "+sram-ecc"
|
||||
// TMP-SAME: "-target-feature" "+xnack"
|
||||
|
||||
// CHECK: [[LLD:"[^"]*lld[^"]*"]]
|
||||
// CHECK-SAME: "-plugin-opt=mcpu=gfx908"
|
||||
// CHECK-SAME: "-plugin-opt=-mattr=+sram-ecc,+xnack"
|
||||
|
||||
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: "-target-cpu" "gfx908"
|
||||
// CHECK-SAME: "-target-feature" "-sram-ecc"
|
||||
// CHECK-SAME: "-target-feature" "+xnack"
|
||||
|
||||
// CHECK: [[LLD]]
|
||||
// CHECK-SAME: "-plugin-opt=mcpu=gfx908"
|
||||
// CHECK-SAME: "-plugin-opt=-mattr=-sram-ecc,+xnack"
|
||||
|
||||
// CHECK: {{"[^"]*clang-offload-bundler[^"]*"}}
|
||||
// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc+:xnack+,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc-:xnack+"
|
||||
|
||||
// Check canonicalization and repeating of target ID.
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip \
|
||||
// RUN: --offload-arch=fiji \
|
||||
// RUN: --offload-arch=gfx803 \
|
||||
// RUN: --offload-arch=fiji \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=FIJI %s
|
||||
// FIJI: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx803"
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip \
|
||||
// RUN: --offload-arch=gfx900:xnack- \
|
||||
// RUN: --offload-arch=gfx900:xnack+ \
|
||||
// RUN: --offload-arch=gfx908:sram-ecc+ \
|
||||
// RUN: --offload-arch=gfx908:sram-ecc- \
|
||||
// RUN: --offload-arch=gfx906 \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=MULTI %s
|
||||
// MULTI: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx900:xnack+,hip-amdgcn-amd-amdhsa-gfx900:xnack-,hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc+,hip-amdgcn-amd-amdhsa-gfx908:sram-ecc-"
|
||||
@ -3,44 +3,34 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \
|
||||
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
|
||||
// RUN: -mxnack 2>&1 | FileCheck %s -check-prefix=XNACK
|
||||
// RUN: --cuda-gpu-arch=gfx906:xnack+ --cuda-gpu-arch=gfx900:xnack+ %s \
|
||||
// RUN: 2>&1 | FileCheck %s -check-prefix=XNACK
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \
|
||||
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
|
||||
// RUN: -mno-xnack 2>&1 | FileCheck %s -check-prefix=NOXNACK
|
||||
// RUN: --cuda-gpu-arch=gfx906:xnack- --cuda-gpu-arch=gfx900:xnack- %s \
|
||||
// RUN: 2>&1 | FileCheck %s -check-prefix=NOXNACK
|
||||
|
||||
// XNACK: {{.*}}clang{{.*}}"-target-feature" "+xnack"
|
||||
// XNACK: {{.*}}lld{{.*}}"-plugin-opt=-mattr=+xnack"
|
||||
// NOXNACK: {{.*}}clang{{.*}}"-target-feature" "-xnack"
|
||||
// NOXNACK: {{.*}}lld{{.*}}"-plugin-opt=-mattr=-xnack"
|
||||
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \
|
||||
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
|
||||
// RUN: -msram-ecc 2>&1 | FileCheck %s -check-prefix=SRAM
|
||||
// RUN: --cuda-gpu-arch=gfx908:sram-ecc+ %s \
|
||||
// RUN: 2>&1 | FileCheck %s -check-prefix=SRAM
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \
|
||||
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
|
||||
// RUN: -mno-sram-ecc 2>&1 | FileCheck %s -check-prefix=NOSRAM
|
||||
// RUN: --cuda-gpu-arch=gfx908:sram-ecc- %s \
|
||||
// RUN: 2>&1 | FileCheck %s -check-prefix=NOSRAM
|
||||
|
||||
// SRAM: {{.*}}clang{{.*}}"-target-feature" "+sram-ecc"
|
||||
// SRAM: {{.*}}lld{{.*}}"-plugin-opt=-mattr=+sram-ecc"
|
||||
// NOSRAM: {{.*}}clang{{.*}}"-target-feature" "-sram-ecc"
|
||||
// NOSRAM: {{.*}}lld{{.*}}"-plugin-opt=-mattr=-sram-ecc"
|
||||
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \
|
||||
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
|
||||
// RUN: -mxnack -msram-ecc \
|
||||
// RUN: --cuda-gpu-arch=gfx908:xnack+:sram-ecc+ %s \
|
||||
// RUN: 2>&1 | FileCheck %s -check-prefix=ALL3
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \
|
||||
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
|
||||
// RUN: -mno-xnack -mno-sram-ecc \
|
||||
// RUN: --cuda-gpu-arch=gfx908:xnack-:sram-ecc- %s \
|
||||
// RUN: 2>&1 | FileCheck %s -check-prefix=NOALL3
|
||||
|
||||
// ALL3: {{.*}}clang{{.*}}"-target-feature" "+xnack" "-target-feature" "+sram-ecc"
|
||||
// ALL3: {{.*}}lld{{.*}}"-plugin-opt=-mattr=+xnack,+sram-ecc"
|
||||
// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-xnack" "-target-feature" "-sram-ecc"
|
||||
// NOALL3: {{.*}}lld{{.*}}"-plugin-opt=-mattr=-xnack,-sram-ecc"
|
||||
// ALL3: {{.*}}clang{{.*}}"-target-feature" "+sram-ecc" "-target-feature" "+xnack"
|
||||
// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-sram-ecc" "-target-feature" "-xnack"
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \
|
||||
// RUN: --cuda-gpu-arch=gfx1010 %s \
|
||||
|
||||
45
clang/test/Driver/invalid-target-id.cl
Normal file
45
clang/test/Driver/invalid-target-id.cl
Normal file
@ -0,0 +1,45 @@
|
||||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: not %clang -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx908xnack -nostdlib \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
|
||||
|
||||
// NOPLUS: error: Invalid target ID: gfx908xnack
|
||||
|
||||
// RUN: not %clang -target amdgcn-amd-amdpal \
|
||||
// RUN: -mcpu=gfx908:xnack+:xnack+ -nostdlib \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s
|
||||
|
||||
// ORDER: error: Invalid target ID: gfx908:xnack+:xnack+
|
||||
|
||||
// RUN: not %clang -target amdgcn--mesa3d \
|
||||
// RUN: -mcpu=gfx908:unknown+ -nostdlib \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s
|
||||
|
||||
// UNK: error: Invalid target ID: gfx908:unknown+
|
||||
|
||||
// RUN: not %clang -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx908:sram-ecc+:unknown+ -nostdlib \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s
|
||||
|
||||
// MIXED: error: Invalid target ID: gfx908:sram-ecc+:unknown+
|
||||
|
||||
// RUN: not %clang -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx900:sram-ecc+ -nostdlib \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s
|
||||
|
||||
// UNSUP: error: Invalid target ID: gfx900:sram-ecc+
|
||||
|
||||
// RUN: not %clang -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx900:xnack -nostdlib \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=NOSIGN %s
|
||||
|
||||
// NOSIGN: error: Invalid target ID: gfx900:xnack
|
||||
|
||||
// RUN: not %clang -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx900+xnack -nostdlib \
|
||||
// RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
|
||||
|
||||
// NOCOLON: error: Invalid target ID: gfx900+xnack
|
||||
38
clang/test/Driver/target-id-macros.cl
Normal file
38
clang/test/Driver/target-id-macros.cl
Normal file
@ -0,0 +1,38 @@
|
||||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- -nogpulib -o - %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=PROC,ID1 %s
|
||||
|
||||
// RUN: %clang -E -dM -target amdgcn-amd-amdpal \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- -nogpulib -o - %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=PROC,ID1 %s
|
||||
|
||||
// RUN: %clang -E -dM -target amdgcn--mesa3d \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- -nogpulib -o - %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=PROC,ID1 %s
|
||||
|
||||
// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx908 -nogpulib -o - %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=PROC,ID2 %s
|
||||
|
||||
// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \
|
||||
// RUN: -nogpulib -o - %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=NONE %s
|
||||
|
||||
// PROC-DAG: #define __amdgcn_processor__ "gfx908"
|
||||
|
||||
// ID1-DAG: #define __amdgcn_feature_xnack__ 1
|
||||
// ID1-DAG: #define __amdgcn_feature_sram_ecc__ 0
|
||||
// ID1-DAG: #define __amdgcn_target_id__ "gfx908:sram-ecc-:xnack+"
|
||||
|
||||
// ID2-DAG: #define __amdgcn_target_id__ "gfx908"
|
||||
// ID2-NOT: #define __amdgcn_feature_xnack__
|
||||
// ID2-NOT: #define __amdgcn_feature_sram_ecc__
|
||||
|
||||
// NONE-NOT: #define __amdgcn_processor__
|
||||
// NONE-NOT: #define __amdgcn_feature_xnack__
|
||||
// NONE-NOT: #define __amdgcn_feature_sram_ecc__
|
||||
// NONE-NOT: #define __amdgcn_target_id__
|
||||
12
clang/test/Driver/target-id-macros.hip
Normal file
12
clang/test/Driver/target-id-macros.hip
Normal file
@ -0,0 +1,12 @@
|
||||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -E -dM -target x86_64-linux-gnu --cuda-device-only \
|
||||
// RUN: --offload-arch=gfx908:xnack+:sram-ecc- -nogpuinc -nogpulib \
|
||||
// RUN: -o - %s 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK-DAG: #define __amdgcn_processor__ "gfx908"
|
||||
// CHECK-DAG: #define __amdgcn_feature_xnack__ 1
|
||||
// CHECK-DAG: #define __amdgcn_feature_sram_ecc__ 0
|
||||
// CHECK-DAG: #define __amdgcn_target_id__ "gfx908:sram-ecc-:xnack+"
|
||||
33
clang/test/Driver/target-id.cl
Normal file
33
clang/test/Driver/target-id.cl
Normal file
@ -0,0 +1,33 @@
|
||||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: -nostdlib %s 2>&1 | FileCheck %s
|
||||
|
||||
// RUN: %clang -### -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: -nostdlib -x ir %s 2>&1 | FileCheck %s
|
||||
|
||||
// RUN: %clang -### -target amdgcn-amd-amdhsa \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: -nostdlib -x assembler %s 2>&1 | FileCheck %s
|
||||
|
||||
// RUN: %clang -### -target amdgcn-amd-amdpal \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: -nostdlib %s 2>&1 | FileCheck %s
|
||||
|
||||
// RUN: %clang -### -target amdgcn--mesa3d \
|
||||
// RUN: -mcpu=gfx908:xnack+:sram-ecc- \
|
||||
// RUN: -nostdlib %s 2>&1 | FileCheck %s
|
||||
|
||||
// RUN: %clang -### -target amdgcn-amd-amdhsa \
|
||||
// RUN: -nostdlib %s 2>&1 | FileCheck -check-prefix=NONE %s
|
||||
|
||||
// CHECK: "-target-cpu" "gfx908"
|
||||
// CHECK-SAME: "-target-feature" "-sram-ecc"
|
||||
// CHECK-SAME: "-target-feature" "+xnack"
|
||||
|
||||
// NONE-NOT: "-target-cpu"
|
||||
// NONE-NOT: "-target-feature"
|
||||
@ -113,12 +113,18 @@ enum ArchFeatureKind : uint32_t {
|
||||
FEATURE_FAST_DENORMAL_F32 = 1 << 5,
|
||||
|
||||
// Wavefront 32 is available.
|
||||
FEATURE_WAVE32 = 1 << 6
|
||||
FEATURE_WAVE32 = 1 << 6,
|
||||
|
||||
// Xnack is available.
|
||||
FEATURE_XNACK = 1 << 7,
|
||||
|
||||
// Sram-ecc is available.
|
||||
FEATURE_SRAM_ECC = 1 << 8,
|
||||
};
|
||||
|
||||
StringRef getArchNameAMDGCN(GPUKind AK);
|
||||
StringRef getArchNameR600(GPUKind AK);
|
||||
StringRef getCanonicalArchName(StringRef Arch);
|
||||
StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
|
||||
GPUKind parseArchAMDGCN(StringRef CPU);
|
||||
GPUKind parseArchR600(StringRef CPU);
|
||||
unsigned getArchAttrAMDGCN(GPUKind AK);
|
||||
|
||||
@ -83,26 +83,26 @@ constexpr GPUInfo AMDGCNGPUs[39] = {
|
||||
{{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE},
|
||||
{{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
|
||||
{{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE},
|
||||
{{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
|
||||
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
{{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAM_ECC},
|
||||
{{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAM_ECC},
|
||||
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
|
||||
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
|
||||
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
|
||||
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
|
||||
{{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
{{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
};
|
||||
@ -212,6 +212,15 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
|
||||
}
|
||||
}
|
||||
|
||||
StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
|
||||
assert(T.isAMDGPU());
|
||||
auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch);
|
||||
if (ProcKind == GK_NONE)
|
||||
return StringRef();
|
||||
|
||||
return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
namespace RISCV {
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user