[AMDGPU] Remove DX10_CLAMP and IEEE bits from gfx1170 (#182107)
Add `DX10ClampAndIEEEMode` feature and set it for every subtarget prior to gfx1170
This commit is contained in:
parent
de5e081a83
commit
d0f50d5574
@ -1,19 +1,24 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
//
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
|
||||
// RUN: -target-feature +dx10-clamp-and-ieee-mode \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,ON %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
|
||||
// RUN: -target-feature +dx10-clamp-and-ieee-mode \
|
||||
// RUN: -mno-amdgpu-ieee -menable-no-nans \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,OFF %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
|
||||
// RUN: -target-feature +dx10-clamp-and-ieee-mode \
|
||||
// RUN: -mno-amdgpu-ieee -cl-fast-relaxed-math \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,OFF %s
|
||||
|
||||
// Check AMDGCN ISA generation.
|
||||
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
|
||||
// RUN: -target-feature +dx10-clamp-and-ieee-mode \
|
||||
// RUN: | FileCheck -check-prefixes=ISA-ON %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
|
||||
// RUN: -target-feature +dx10-clamp-and-ieee-mode \
|
||||
// RUN: -mno-amdgpu-ieee -menable-no-nans \
|
||||
// RUN: | FileCheck -check-prefixes=ISA-OFF %s
|
||||
|
||||
|
||||
@ -1998,11 +1998,11 @@ The AMDGPU backend supports the following LLVM IR attributes.
|
||||
"amdgpu-flat-work-group-size" value, the implied occupancy
|
||||
bounds by the workgroup size takes precedence.
|
||||
|
||||
"amdgpu-ieee" true/false. GFX6-GFX11 Only
|
||||
"amdgpu-ieee" true/false. GFX6-GFX11 (Except GFX1170) Only
|
||||
Specify whether the function expects the IEEE field of the
|
||||
mode register to be set on entry. Overrides the default for
|
||||
the calling convention.
|
||||
"amdgpu-dx10-clamp" true/false. GFX6-GFX11 Only
|
||||
"amdgpu-dx10-clamp" true/false. GFX6-GFX11 (Except GFX1170) Only
|
||||
Specify whether the function expects the DX10_CLAMP field of
|
||||
the mode register to be set on entry. Overrides the default
|
||||
for the calling convention.
|
||||
@ -5787,7 +5787,7 @@ The fields used by CP for code objects before V3 also match those specified in
|
||||
CP is responsible for
|
||||
filling in
|
||||
``COMPUTE_PGM_RSRC1.PRIV``.
|
||||
21 1 bit ENABLE_DX10_CLAMP GFX9-GFX11
|
||||
21 1 bit ENABLE_DX10_CLAMP GFX9-GFX11 (except GFX1170)
|
||||
Wavefront starts execution
|
||||
with DX10 clamp mode
|
||||
enabled. Used by the vector
|
||||
@ -5799,6 +5799,8 @@ The fields used by CP for code objects before V3 also match those specified in
|
||||
|
||||
Used by CP to set up
|
||||
``COMPUTE_PGM_RSRC1.DX10_CLAMP``.
|
||||
GFX1170
|
||||
Reserved. Must be 0.
|
||||
WG_RR_EN GFX12
|
||||
If 1, wavefronts are scheduled
|
||||
in a round-robin fashion with
|
||||
@ -21529,9 +21531,11 @@ terminated by an ``.end_amdhsa_kernel`` directive.
|
||||
Possible values are defined in
|
||||
:ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`.
|
||||
``.amdhsa_dx10_clamp`` 1 GFX6-GFX11 Controls ENABLE_DX10_CLAMP in
|
||||
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
|
||||
(except :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
|
||||
GFX1170)
|
||||
``.amdhsa_ieee_mode`` 1 GFX6-GFX11 Controls ENABLE_IEEE_MODE in
|
||||
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
|
||||
(except :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
|
||||
GFX1170)
|
||||
``.amdhsa_round_robin_scheduling`` 0 GFX12 Controls ENABLE_WG_RR_EN in
|
||||
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
|
||||
``.amdhsa_fp16_overflow`` 0 GFX9-GFX12 Controls FP16_OVFL in
|
||||
|
||||
@ -160,6 +160,10 @@ defm RelaxedBufferOOBMode : AMDGPUSubtargetFeature<"relaxed-buffer-oob-mode",
|
||||
"cause an adjacent access to be treated as if it were also OOB"
|
||||
>;
|
||||
|
||||
defm DX10ClampAndIEEEMode : AMDGPUSubtargetFeature<"dx10-clamp-and-ieee-mode",
|
||||
"Target has DX10_CLAMP and IEEE_MODE kernel descriptor bits"
|
||||
>;
|
||||
|
||||
defm ApertureRegs : AMDGPUSubtargetFeature<"aperture-regs",
|
||||
"Has Memory Aperture Base and Size Registers",
|
||||
/*GenPredicate=*/0
|
||||
@ -1328,7 +1332,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
|
||||
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
|
||||
FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
|
||||
FeatureSadInsts, FeatureCvtPkNormVOP2Insts
|
||||
FeatureSadInsts, FeatureCvtPkNormVOP2Insts, FeatureDX10ClampAndIEEEMode
|
||||
]
|
||||
>;
|
||||
|
||||
@ -1343,7 +1347,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
|
||||
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
|
||||
FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
|
||||
FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts
|
||||
FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts,
|
||||
FeatureDX10ClampAndIEEEMode
|
||||
]
|
||||
>;
|
||||
|
||||
@ -1362,7 +1367,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
|
||||
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts,
|
||||
FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
|
||||
FeatureCvtPkNormVOP2Insts
|
||||
FeatureCvtPkNormVOP2Insts, FeatureDX10ClampAndIEEEMode
|
||||
]
|
||||
>;
|
||||
|
||||
@ -1384,7 +1389,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
|
||||
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureVMemToLDSLoad,
|
||||
FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
|
||||
FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
|
||||
FeatureCvtPkNormVOP3Insts
|
||||
FeatureCvtPkNormVOP3Insts, FeatureDX10ClampAndIEEEMode
|
||||
]
|
||||
>;
|
||||
|
||||
@ -1411,7 +1416,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
|
||||
FeatureVmemWriteVgprInOrder, FeatureVMemToLDSLoad, FeatureCubeInsts,
|
||||
FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
|
||||
FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
|
||||
FeatureCvtPkNormVOP3Insts
|
||||
FeatureCvtPkNormVOP3Insts, FeatureDX10ClampAndIEEEMode
|
||||
]
|
||||
>;
|
||||
|
||||
@ -1849,7 +1854,8 @@ def FeatureISAVersion11_Generic: FeatureSet<
|
||||
FeatureRequiresCOV6,
|
||||
FeatureRequiredExportPriority,
|
||||
FeatureDot5Insts,
|
||||
FeatureWMMA256bInsts])>;
|
||||
FeatureWMMA256bInsts,
|
||||
FeatureDX10ClampAndIEEEMode])>;
|
||||
|
||||
def FeatureISAVersion11_0_Common : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
@ -1858,7 +1864,8 @@ def FeatureISAVersion11_0_Common : FeatureSet<
|
||||
FeatureMADIntraFwdBug,
|
||||
FeaturePrivEnabledTrap2NopBug,
|
||||
FeatureDot5Insts,
|
||||
FeatureWMMA256bInsts])>;
|
||||
FeatureWMMA256bInsts,
|
||||
FeatureDX10ClampAndIEEEMode])>;
|
||||
|
||||
def FeatureISAVersion11_0_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_0_Common.Features,
|
||||
@ -1883,7 +1890,8 @@ def FeatureISAVersion11_5_Common : FeatureSet<
|
||||
FeatureDPPSrc1SGPR,
|
||||
FeatureRequiredExportPriority,
|
||||
FeatureDot5Insts,
|
||||
FeatureWMMA256bInsts])>;
|
||||
FeatureWMMA256bInsts,
|
||||
FeatureDX10ClampAndIEEEMode])>;
|
||||
|
||||
def FeatureISAVersion11_5_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_5_Common.Features,
|
||||
|
||||
@ -1448,7 +1448,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
|
||||
const SIProgramInfo &CurrentProgramInfo,
|
||||
CallingConv::ID CC, const GCNSubtarget &ST,
|
||||
unsigned DynamicVGPRBlockSize) {
|
||||
if (ST.hasIEEEMode())
|
||||
if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
|
||||
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
|
||||
|
||||
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
|
||||
|
||||
@ -2130,10 +2130,10 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
|
||||
MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs;
|
||||
}
|
||||
|
||||
if (ST.hasIEEEMode())
|
||||
if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
|
||||
if (ST.hasDX10ClampMode())
|
||||
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
|
||||
}
|
||||
|
||||
// FIXME: Move proper support for denormal-fp-math into base MachineFunction
|
||||
MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals
|
||||
|
||||
@ -1651,8 +1651,8 @@ void GCNTTIImpl::collectKernelLaunchBounds(
|
||||
|
||||
GCNTTIImpl::KnownIEEEMode
|
||||
GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const {
|
||||
if (!ST->hasIEEEMode()) // Only mode on gfx12
|
||||
return KnownIEEEMode::On;
|
||||
if (!ST->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
|
||||
return KnownIEEEMode::On; // Only mode on gfx1170+
|
||||
|
||||
const Function *F = I.getFunction();
|
||||
if (!F)
|
||||
|
||||
@ -6243,14 +6243,16 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||
COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
|
||||
ValRange);
|
||||
} else if (ID == ".amdhsa_dx10_clamp") {
|
||||
if (IVersion.Major >= 12)
|
||||
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
|
||||
if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
|
||||
return Error(IDRange.Start, "directive unsupported on gfx1170+",
|
||||
IDRange);
|
||||
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
|
||||
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
|
||||
ValRange);
|
||||
} else if (ID == ".amdhsa_ieee_mode") {
|
||||
if (IVersion.Major >= 12)
|
||||
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
|
||||
if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
|
||||
return Error(IDRange.Start, "directive unsupported on gfx1170+",
|
||||
IDRange);
|
||||
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
|
||||
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
|
||||
ValRange);
|
||||
|
||||
@ -2419,13 +2419,13 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
|
||||
|
||||
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
|
||||
|
||||
if (!isGFX12Plus())
|
||||
if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
|
||||
PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
|
||||
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
|
||||
|
||||
CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
|
||||
|
||||
if (!isGFX12Plus())
|
||||
if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
|
||||
PRINT_DIRECTIVE(".amdhsa_ieee_mode",
|
||||
COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
|
||||
|
||||
|
||||
@ -683,12 +683,6 @@ public:
|
||||
// \returns true if the target has split barriers feature
|
||||
bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
|
||||
|
||||
// \returns true if the target has DX10_CLAMP kernel descriptor mode bit
|
||||
bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
|
||||
|
||||
// \returns true if the target has IEEE kernel descriptor mode bit
|
||||
bool hasIEEEMode() const { return getGeneration() < GFX12; }
|
||||
|
||||
// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
|
||||
bool hasRrWGMode() const { return getGeneration() >= GFX12; }
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@ MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI,
|
||||
MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx),
|
||||
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx);
|
||||
if (Version.Major < 12) {
|
||||
if (STI->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
MCKernelDescriptor::bits_set(
|
||||
KD.compute_pgm_rsrc1, OneMCExpr,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
#include "AMDGPUTargetStreamer.h"
|
||||
#include "AMDGPUMCExpr.h"
|
||||
#include "AMDGPUMCKernelDescriptor.h"
|
||||
#include "AMDGPUMCTargetDesc.h"
|
||||
#include "AMDGPUPTNote.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "Utils/AMDKernelCodeTUtils.h"
|
||||
@ -561,7 +562,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
||||
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
|
||||
".amdhsa_float_denorm_mode_16_64");
|
||||
if (IVersion.Major < 12) {
|
||||
if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
PrintField(KD.compute_pgm_rsrc1,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
|
||||
amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
|
||||
|
||||
@ -15,13 +15,11 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
|
||||
const GCNSubtarget &ST) {
|
||||
*this = getDefaultForCallingConv(F.getCallingConv());
|
||||
|
||||
if (ST.hasIEEEMode()) {
|
||||
if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
|
||||
if (!IEEEAttr.empty())
|
||||
IEEE = IEEEAttr == "true";
|
||||
}
|
||||
|
||||
if (ST.hasDX10ClampMode()) {
|
||||
StringRef DX10ClampAttr =
|
||||
F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
|
||||
if (!DX10ClampAttr.empty())
|
||||
|
||||
@ -89,11 +89,10 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
|
||||
S_00B848_MEM_ORDERED(ProgInfo.MemOrdered) |
|
||||
S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
|
||||
|
||||
if (ST.hasDX10ClampMode())
|
||||
if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
|
||||
|
||||
if (ST.hasIEEEMode())
|
||||
Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
|
||||
}
|
||||
|
||||
if (ST.hasRrWGMode())
|
||||
Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
|
||||
@ -108,11 +107,10 @@ static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
|
||||
S_00B848_PRIV(ProgInfo.Priv) |
|
||||
S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
|
||||
|
||||
if (ST.hasDX10ClampMode())
|
||||
if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
|
||||
|
||||
if (ST.hasIEEEMode())
|
||||
Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
|
||||
}
|
||||
|
||||
if (ST.hasRrWGMode())
|
||||
Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
|
||||
#include "AMDKernelCodeTUtils.h"
|
||||
#include "AMDKernelCodeT.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "SIDefines.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "Utils/SIDefinesUtils.h"
|
||||
@ -21,6 +22,7 @@
|
||||
#include "llvm/MC/MCParser/AsmLexer.h"
|
||||
#include "llvm/MC/MCParser/MCAsmParser.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
@ -383,13 +385,15 @@ void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) {
|
||||
if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value))
|
||||
return;
|
||||
|
||||
if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) {
|
||||
Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+");
|
||||
if (G_00B848_DX10_CLAMP(Value) &&
|
||||
!STI->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX1170+");
|
||||
return;
|
||||
}
|
||||
|
||||
if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) {
|
||||
Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+");
|
||||
if (G_00B848_IEEE_MODE(Value) &&
|
||||
!STI->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
|
||||
Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX1170+");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-xnack < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mattr=-xnack -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s
|
||||
|
||||
|
||||
100
llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll
Normal file
100
llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp-on.ll
Normal file
@ -0,0 +1,100 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 -enable-var-scope %s
|
||||
|
||||
; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata
|
||||
; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x2f0000{{$}}
|
||||
; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x2f02c0{{$}}
|
||||
; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x2f0000{{$}}
|
||||
; GFX1170-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
|
||||
; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
|
||||
define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal evaluation shader: check for 0x2cca (SPI_SHADER_PGM_RSRC1_ES) in pal metadata
|
||||
; SI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0x2f0000{{$}}
|
||||
; VI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0x2f02c0{{$}}
|
||||
; GFX9-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0x2f0000{{$}}
|
||||
; GFX1170-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
; GFX12-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
define amdgpu_es half @es_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal geometry shader: check for 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) in pal metadata
|
||||
; SI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0x2f0000{{$}}
|
||||
; VI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0x2f02c0{{$}}
|
||||
; GFX9-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0x2f0000{{$}}
|
||||
; GFX1170-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}}
|
||||
; GFX12-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}}
|
||||
define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal hull shader: check for 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) in pal metadata
|
||||
; SI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x2f0000{{$}}
|
||||
; VI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x2f02c0{{$}}
|
||||
; GFX9-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x2f0000{{$}}
|
||||
; GFX1170-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}}
|
||||
; GFX12-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}}
|
||||
define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal load shader: check for 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) in pal metadata
|
||||
; SI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0x2f0000{{$}}
|
||||
; VI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0x2f02c0{{$}}
|
||||
; GFX9-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0x2f0000{{$}}
|
||||
; GFX1170-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
; GFX12-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal metadata
|
||||
; SI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x2f0000{{$}}
|
||||
; VI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x2f02c0{{$}}
|
||||
; GFX9-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x2f0000{{$}}
|
||||
; GFX1170-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}}
|
||||
; GFX12-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}}
|
||||
define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal vertex shader: check for 0x2c4a (SPI_SHADER_PGM_RSRC1_VS) in pal metadata
|
||||
; SI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x2f0000{{$}}
|
||||
; VI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x2f02c0{{$}}
|
||||
; GFX9-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x2f0000{{$}}
|
||||
; GFX1170-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}}
|
||||
; GFX12-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}}
|
||||
define amdgpu_vs half @vs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-dx10-clamp"="true" }
|
||||
|
||||
; amdgpu.pal.metadata.msgpack represents this:
|
||||
;
|
||||
; .amdgpu_pal_metadata
|
||||
; ---
|
||||
; amdpal.pipelines:
|
||||
; - .internal_pipeline_hash:
|
||||
; - 0x123456789abcdef0
|
||||
; - 0xfedcba9876543210
|
||||
; .registers:
|
||||
; '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x42000000
|
||||
; ...
|
||||
; .end_amdgpu_pal_metadata
|
||||
|
||||
!amdgpu.pal.metadata.msgpack = !{!0}
|
||||
!0 = !{!"\81\b0\61\6d\64\70\61\6c\2e\70\69\70\65\6c\69\6e\65\73\91\82\b7\2e\69\6e\74\65\72\6e\61\6c\5f\70\69\70\65\6c\69\6e\65\5f\68\61\73\68\92\cf\12\34\56\78\9a\bc\de\f0\cf\fe\dc\ba\98\76\54\32\10\aa\2e\72\65\67\69\73\74\65\72\73\81\cd\2c\0b\ce\42\00\00\00"};
|
||||
@ -1,11 +1,15 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 -enable-var-scope %s
|
||||
|
||||
; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata
|
||||
; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xf0000{{$}}
|
||||
; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xf02c0{{$}}
|
||||
; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xf0000{{$}}
|
||||
; GFX1170-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
|
||||
; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
|
||||
define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
@ -15,6 +19,8 @@ define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
; VI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf02c0{{$}}
|
||||
; GFX9-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
; GFX1170-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
; GFX12-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
define amdgpu_es half @es_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
@ -24,6 +30,8 @@ define amdgpu_es half @es_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xf0000{{$}}
|
||||
; VI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xf02c0{{$}}
|
||||
; GFX9-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xf0000{{$}}
|
||||
; GFX1170-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}}
|
||||
; GFX12-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}}
|
||||
define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
@ -33,6 +41,8 @@ define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xf0000{{$}}
|
||||
; VI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xf02c0{{$}}
|
||||
; GFX9-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xf0000{{$}}
|
||||
; GFX1170-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}}
|
||||
; GFX12-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}}
|
||||
define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
@ -42,25 +52,30 @@ define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
; VI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf02c0{{$}}
|
||||
; GFX9-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
; GFX1170-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
; GFX12-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal metadata
|
||||
; below.
|
||||
; SI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xf0000{{$}}
|
||||
; VI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xf02c0{{$}}
|
||||
; GFX9-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xf0000{{$}}
|
||||
; GFX1170-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}}
|
||||
; GFX12-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}}
|
||||
define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
; amdpal vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in pal metadata
|
||||
; amdpal vertex shader: check for 0x2c4a (SPI_SHADER_PGM_RSRC1_VS) in pal metadata
|
||||
; SI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xf0000{{$}}
|
||||
; VI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xf02c0{{$}}
|
||||
; GFX9-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xf0000{{$}}
|
||||
; GFX1170-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}}
|
||||
; GFX12-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}}
|
||||
define amdgpu_vs half @vs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
ret half %add
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 -enable-var-scope %s
|
||||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 -enable-var-scope %s
|
||||
|
||||
; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata
|
||||
; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}}
|
||||
; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf02c0{{$}}
|
||||
; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}}
|
||||
; GFX1170-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
|
||||
; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
|
||||
define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
@ -17,6 +19,7 @@ define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xaf0000{{$}}
|
||||
; VI-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xaf02c0{{$}}
|
||||
; GFX9-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xaf0000{{$}}
|
||||
; GFX1170-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
; GFX12-DAG: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0xf0000{{$}}
|
||||
define amdgpu_es half @es_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
@ -27,6 +30,7 @@ define amdgpu_es half @es_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xaf0000{{$}}
|
||||
; VI-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xaf02c0{{$}}
|
||||
; GFX9-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xaf0000{{$}}
|
||||
; GFX1170-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}}
|
||||
; GFX12-DAG: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0xa0f0000{{$}}
|
||||
define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
@ -37,6 +41,7 @@ define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xaf0000{{$}}
|
||||
; VI-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xaf02c0{{$}}
|
||||
; GFX9-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0xaf0000{{$}}
|
||||
; GFX1170-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}}
|
||||
; GFX12-DAG: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0x50f0000{{$}}
|
||||
define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
@ -47,6 +52,7 @@ define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xaf0000{{$}}
|
||||
; VI-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xaf02c0{{$}}
|
||||
; GFX9-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xaf0000{{$}}
|
||||
; GFX1170-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
; GFX12-DAG: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0xf0000{{$}}
|
||||
define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
@ -58,6 +64,7 @@ define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xaf0000{{$}}
|
||||
; VI-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xaf02c0{{$}}
|
||||
; GFX9-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0xaf0000{{$}}
|
||||
; GFX1170-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}}
|
||||
; GFX12-DAG: '0x2c0a (SPI_SHADER_PGM_RSRC1_PS)': 0x20f0000{{$}}
|
||||
define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
@ -68,6 +75,7 @@ define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
|
||||
; SI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xaf0000{{$}}
|
||||
; VI-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xaf02c0{{$}}
|
||||
; GFX9-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0xaf0000{{$}}
|
||||
; GFX1170-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}}
|
||||
; GFX12-DAG: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0x80f0000{{$}}
|
||||
define amdgpu_vs half @vs_amdpal(half %arg0) #0 {
|
||||
%add = fadd half %arg0, 1.0
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+dx10-clamp-and-ieee-mode < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}test_default_ci:
|
||||
; GCN: .amdhsa_dx10_clamp 1
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-fold-operands -mattr=+dx10-clamp-and-ieee-mode %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+dx10-clamp-and-ieee-mode -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+dx10-clamp-and-ieee-mode -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s
|
||||
|
||||
|
||||
---
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,GFX10PLUS,GFX10,AMDHSA
|
||||
// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,GFX10PLUS,GFX11,AMDHSA
|
||||
// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1200 %s -filetype=null 2>&1 | FileCheck %s -DMCPU=gfx1200 --check-prefixes=ALL,GCN,GFX10PLUS,GFX12,AMDHSA
|
||||
// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1170 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,GFX10PLUS,GFX1170,AMDHSA
|
||||
// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=ALL,GCN,NONAMDHSA
|
||||
// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack %s -filetype=null 2>&1 | FileCheck %s -DMCPU=gfx90a --check-prefixes=ALL,GFX90A,PREGFX10,NOWGP,AMDHSA
|
||||
// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1250 %s -filetype=null 2>&1 | FileCheck %s -DMCPU=gfx1250 --check-prefixes=ALL,GCN,GFX10PLUS,GFX12,NOWGP,AMDHSA
|
||||
@ -13,6 +14,7 @@
|
||||
// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1010:xnack+
|
||||
// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1100
|
||||
// GFX12: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--[[MCPU]]
|
||||
// GFX1170: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1170
|
||||
// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown--gfx810
|
||||
.warning "test_target"
|
||||
.amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+"
|
||||
@ -290,7 +292,8 @@
|
||||
.end_amdhsa_kernel
|
||||
|
||||
// GCN-LABEL: warning: test_amdhsa_dx10_clamp_bit
|
||||
// GFX12: error: directive unsupported on gfx12+
|
||||
// GFX1170: error: directive unsupported on gfx1170+
|
||||
// GFX12: error: directive unsupported on gfx1170+
|
||||
.warning "test_amdhsa_dx10_clamp_bit"
|
||||
.amdhsa_kernel test_amdhsa_dx10_clamp_bit
|
||||
.amdhsa_next_free_vgpr 32
|
||||
@ -299,7 +302,8 @@
|
||||
.end_amdhsa_kernel
|
||||
|
||||
// GCN-LABEL: warning: test_amdhsa_ieee_mode_bit
|
||||
// GFX12: error: directive unsupported on gfx12+
|
||||
// GFX1170: error: directive unsupported on gfx1170+
|
||||
// GFX12: error: directive unsupported on gfx1170+
|
||||
.warning "test_amdhsa_ieee_mode_bit"
|
||||
.amdhsa_kernel test_amdhsa_ieee_mode_bit
|
||||
.amdhsa_next_free_vgpr 32
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline < %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+dx10-clamp-and-ieee-mode -S -passes=inline < %s | FileCheck %s
|
||||
|
||||
define i32 @func_default() #0 {
|
||||
ret i32 0
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline < %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+dx10-clamp-and-ieee-mode -S -passes=inline < %s | FileCheck %s
|
||||
|
||||
define i32 @func_default() #0 {
|
||||
ret i32 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user