Yaxun (Sam) Liu a6786cdd57 [HIPSPV][3/4] Enable SPIR-V emission for HIP
This patch enables SPIR-V binary emission for HIP device code via the
HIPSPV tool chain.

‘--offload’ option, which is envisioned in [1], is added for specifying
offload targets. This option is used to override default device target
(amdgcn-amd-amdhsa) for HIP compilation for emitting device code as
SPIR-V binary. The option is handled in getHIPOffloadTargetTriple().

getOffloadingDeviceToolChain() function (based on the design in the
SYCL repository) is added to select HIPSPVToolChain when HIP offload
target is ‘spirv64’.

The HIPActionBuilder is modified to produce LLVM IR at the backend
phase. HIPSPV tool chain expects to receive HIP device code as LLVM
IR so it can run external LLVM passes over them. HIPSPV TC is also
responsible for emitting the SPIR-V binary.

A Cuda GPU architecture ‘generic’ is added. The name is picked from
the LLVM SPIR-V Backend. In the HIPSPV code path the architecture
name is inserted to the bundle entry ID as target ID. Target ID is
expected to be always present so a component in the target triple
is not mistaken as target ID.

Tests are added for checking the HIPSPV tool chain.

[1]: https://lists.llvm.org/pipermail/cfe-dev/2020-December/067362.html

Patch by: Henry Linjamäki

Reviewed by: Yaxun Liu, Artem Belevich, Alexey Bader

Differential Revision: https://reviews.llvm.org/D110622
2021-12-20 10:45:09 -05:00

269 lines
7.1 KiB
C++

#include "clang/Basic/Cuda.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/VersionTuple.h"
namespace clang {
const char *CudaVersionToString(CudaVersion V) {
switch (V) {
case CudaVersion::UNKNOWN:
return "unknown";
case CudaVersion::CUDA_70:
return "7.0";
case CudaVersion::CUDA_75:
return "7.5";
case CudaVersion::CUDA_80:
return "8.0";
case CudaVersion::CUDA_90:
return "9.0";
case CudaVersion::CUDA_91:
return "9.1";
case CudaVersion::CUDA_92:
return "9.2";
case CudaVersion::CUDA_100:
return "10.0";
case CudaVersion::CUDA_101:
return "10.1";
case CudaVersion::CUDA_102:
return "10.2";
case CudaVersion::CUDA_110:
return "11.0";
case CudaVersion::CUDA_111:
return "11.1";
case CudaVersion::CUDA_112:
return "11.2";
case CudaVersion::CUDA_113:
return "11.3";
case CudaVersion::CUDA_114:
return "11.4";
case CudaVersion::CUDA_115:
return "11.5";
case CudaVersion::NEW:
return "";
}
llvm_unreachable("invalid enum");
}
CudaVersion CudaStringToVersion(const llvm::Twine &S) {
return llvm::StringSwitch<CudaVersion>(S.str())
.Case("7.0", CudaVersion::CUDA_70)
.Case("7.5", CudaVersion::CUDA_75)
.Case("8.0", CudaVersion::CUDA_80)
.Case("9.0", CudaVersion::CUDA_90)
.Case("9.1", CudaVersion::CUDA_91)
.Case("9.2", CudaVersion::CUDA_92)
.Case("10.0", CudaVersion::CUDA_100)
.Case("10.1", CudaVersion::CUDA_101)
.Case("10.2", CudaVersion::CUDA_102)
.Case("11.0", CudaVersion::CUDA_110)
.Case("11.1", CudaVersion::CUDA_111)
.Case("11.2", CudaVersion::CUDA_112)
.Case("11.3", CudaVersion::CUDA_113)
.Case("11.4", CudaVersion::CUDA_114)
.Case("11.5", CudaVersion::CUDA_115)
.Default(CudaVersion::UNKNOWN);
}
namespace {
struct CudaArchToStringMap {
CudaArch arch;
const char *arch_name;
const char *virtual_arch_name;
};
} // namespace
#define SM2(sm, ca) \
{ CudaArch::SM_##sm, "sm_" #sm, ca }
#define SM(sm) SM2(sm, "compute_" #sm)
#define GFX(gpu) \
{ CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" }
static const CudaArchToStringMap arch_names[] = {
// clang-format off
{CudaArch::UNUSED, "", ""},
SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
SM(30), SM(32), SM(35), SM(37), // Kepler
SM(50), SM(52), SM(53), // Maxwell
SM(60), SM(61), SM(62), // Pascal
SM(70), SM(72), // Volta
SM(75), // Turing
SM(80), SM(86), // Ampere
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
GFX(700), // gfx700
GFX(701), // gfx701
GFX(702), // gfx702
GFX(703), // gfx703
GFX(704), // gfx704
GFX(705), // gfx705
GFX(801), // gfx801
GFX(802), // gfx802
GFX(803), // gfx803
GFX(805), // gfx805
GFX(810), // gfx810
GFX(900), // gfx900
GFX(902), // gfx902
GFX(904), // gfx903
GFX(906), // gfx906
GFX(908), // gfx908
GFX(909), // gfx909
GFX(90a), // gfx90a
GFX(90c), // gfx90c
GFX(1010), // gfx1010
GFX(1011), // gfx1011
GFX(1012), // gfx1012
GFX(1013), // gfx1013
GFX(1030), // gfx1030
GFX(1031), // gfx1031
GFX(1032), // gfx1032
GFX(1033), // gfx1033
GFX(1034), // gfx1034
GFX(1035), // gfx1035
{CudaArch::Generic, "generic", ""},
// clang-format on
};
#undef SM
#undef SM2
#undef GFX
const char *CudaArchToString(CudaArch A) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[A](const CudaArchToStringMap &map) { return A == map.arch; });
if (result == std::end(arch_names))
return "unknown";
return result->arch_name;
}
const char *CudaArchToVirtualArchString(CudaArch A) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[A](const CudaArchToStringMap &map) { return A == map.arch; });
if (result == std::end(arch_names))
return "unknown";
return result->virtual_arch_name;
}
CudaArch StringToCudaArch(llvm::StringRef S) {
auto result = std::find_if(
std::begin(arch_names), std::end(arch_names),
[S](const CudaArchToStringMap &map) { return S == map.arch_name; });
if (result == std::end(arch_names))
return CudaArch::UNKNOWN;
return result->arch;
}
CudaVersion MinVersionForCudaArch(CudaArch A) {
if (A == CudaArch::UNKNOWN)
return CudaVersion::UNKNOWN;
// AMD GPUs do not depend on CUDA versions.
if (IsAMDGpuArch(A))
return CudaVersion::CUDA_70;
switch (A) {
case CudaArch::SM_20:
case CudaArch::SM_21:
case CudaArch::SM_30:
case CudaArch::SM_32:
case CudaArch::SM_35:
case CudaArch::SM_37:
case CudaArch::SM_50:
case CudaArch::SM_52:
case CudaArch::SM_53:
return CudaVersion::CUDA_70;
case CudaArch::SM_60:
case CudaArch::SM_61:
case CudaArch::SM_62:
return CudaVersion::CUDA_80;
case CudaArch::SM_70:
return CudaVersion::CUDA_90;
case CudaArch::SM_72:
return CudaVersion::CUDA_91;
case CudaArch::SM_75:
return CudaVersion::CUDA_100;
case CudaArch::SM_80:
return CudaVersion::CUDA_110;
case CudaArch::SM_86:
return CudaVersion::CUDA_111;
default:
llvm_unreachable("invalid enum");
}
}
CudaVersion MaxVersionForCudaArch(CudaArch A) {
// AMD GPUs do not depend on CUDA versions.
if (IsAMDGpuArch(A))
return CudaVersion::NEW;
switch (A) {
case CudaArch::UNKNOWN:
return CudaVersion::UNKNOWN;
case CudaArch::SM_20:
case CudaArch::SM_21:
return CudaVersion::CUDA_80;
case CudaArch::SM_30:
return CudaVersion::CUDA_110;
default:
return CudaVersion::NEW;
}
}
CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
int IVer =
Version.getMajor() * 10 + Version.getMinor().getValueOr(0);
switch(IVer) {
case 70:
return CudaVersion::CUDA_70;
case 75:
return CudaVersion::CUDA_75;
case 80:
return CudaVersion::CUDA_80;
case 90:
return CudaVersion::CUDA_90;
case 91:
return CudaVersion::CUDA_91;
case 92:
return CudaVersion::CUDA_92;
case 100:
return CudaVersion::CUDA_100;
case 101:
return CudaVersion::CUDA_101;
case 102:
return CudaVersion::CUDA_102;
case 110:
return CudaVersion::CUDA_110;
case 111:
return CudaVersion::CUDA_111;
case 112:
return CudaVersion::CUDA_112;
case 113:
return CudaVersion::CUDA_113;
case 114:
return CudaVersion::CUDA_114;
case 115:
return CudaVersion::CUDA_115;
default:
return CudaVersion::UNKNOWN;
}
}
bool CudaFeatureEnabled(llvm::VersionTuple Version, CudaFeature Feature) {
return CudaFeatureEnabled(ToCudaVersion(Version), Feature);
}
bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) {
switch (Feature) {
case CudaFeature::CUDA_USES_NEW_LAUNCH:
return Version >= CudaVersion::CUDA_92;
case CudaFeature::CUDA_USES_FATBIN_REGISTER_END:
return Version >= CudaVersion::CUDA_101;
}
llvm_unreachable("Unknown CUDA feature.");
}
} // namespace clang