[AMDGPU] Remove Code Object V2 (#65715)

Code Object V2 has been deprecated for more than a year now. We can
safely remove it from LLVM.

- [clang] Remove support for the `-mcode-object-version=2` option.
- [lld] Remove/refactor tests that were still using COV2
- [llvm] Update AMDGPUUsage.rst
- Code Object V2 docs are left for informational purposes because those
code objects may still be supported by the runtime/loaders for a while.
- [AMDGPU] Remove COV2 emission capabilities.
- [AMDGPU] Remove `MetadataStreamerYamlV2` which was only used by COV2
- [AMDGPU] Update all tests that were still using COV2 - They are either
deleted or ported directly to code object v4 (as v3 is also planned to
be removed soon).
This commit is contained in:
Pierre van Houtryve 2023-09-21 12:00:45 +02:00 committed by GitHub
parent 315ddc5120
commit fe2f67e4ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
82 changed files with 705 additions and 6645 deletions

View File

@ -82,7 +82,7 @@ public:
/// code object version times 100.
enum CodeObjectVersionKind {
COV_None,
COV_2 = 200,
COV_2 = 200, // Unsupported.
COV_3 = 300,
COV_4 = 400,
COV_5 = 500,

View File

@ -4624,9 +4624,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
Visibility<[ClangOption, CC1Option]>,
Values<"none,2,3,4,5">,
Values<"none,3,4,5">,
NormalizedValuesScope<"TargetOptions">,
NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
NormalizedValues<["COV_None", "COV_3", "COV_4", "COV_5"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
defm cumode : SimpleMFlag<"cumode",

View File

@ -2323,7 +2323,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
const unsigned MinCodeObjVer = 2;
const unsigned MinCodeObjVer = 3;
const unsigned MaxCodeObjVer = 5;
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {

View File

@ -3,9 +3,6 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -o - %s | FileCheck %s -check-prefix=V4
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s
@ -21,7 +18,6 @@
// RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV
// V2: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 200}
// V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300}
// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
// V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}

View File

@ -1,15 +1,5 @@
// REQUIRES: amdgpu-registered-target
// Check bundle ID for code object v2.
// RUN: not %clang -### --target=x86_64-linux-gnu \
// RUN: -mcode-object-version=2 \
// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
// RUN: %s 2>&1 | FileCheck -check-prefix=V2 %s
// V2: "-mllvm" "--amdhsa-code-object-version=2"
// V2: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906"
// Check bundle ID for code object v3.
// RUN: not %clang -### --target=x86_64-linux-gnu \
@ -61,9 +51,16 @@
// RUN: not %clang -### --target=x86_64-linux-gnu \
// RUN: -mcode-object-version=1 \
// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID %s
// INVALID: error: invalid integral value '1' in '-mcode-object-version=1'
// INVALID-NOT: error: invalid integral value
// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID_1 %s
// INVALID_1: error: invalid integral value '1' in '-mcode-object-version=1'
// INVALID_1-NOT: error: invalid integral value
// RUN: not %clang -### --target=x86_64-linux-gnu \
// RUN: -mcode-object-version=2 \
// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID_2 %s
// INVALID_2: error: invalid integral value '2' in '-mcode-object-version=2'
// INVALID_2-NOT: error: invalid integral value
// Check LLVM code object version option --amdhsa-code-object-version
// is passed to -cc1 and -cc1as, and -mcode-object-version is passed

View File

@ -1,6 +1,5 @@
.text
.globl kernel_0
.align 64
.amdgpu_hsa_kernel kernel_0
kernel_0:
s_endpgm

View File

@ -1,6 +1,5 @@
.text
.globl kernel_1
.align 64
.amdgpu_hsa_kernel kernel_1
kernel_1:
s_endpgm

View File

@ -1,6 +1,6 @@
# REQUIRES: amdgpu
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s -o %t-0.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj %s -o %t-1.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -filetype=obj %s -o %t-0.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj %s -o %t-1.o
# RUN: not ld.lld -shared %t-0.o %t-1.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: ld.lld: error: incompatible ABI version: {{.*}}-1.o

View File

@ -1,6 +1,6 @@
# REQUIRES: amdgpu
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o
# RUN: not ld.lld -shared %t-0.o %t-1.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: incompatible e_flags: {{.*}}-1.o
# CHECK: error: incompatible mach: {{.*}}-1.o

View File

@ -1,6 +1,6 @@
# REQUIRES: amdgpu
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-0.s -o %t-0.o
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -filetype=obj %S/Inputs/amdgpu-kernel-1.s -o %t-1.o
# RUN: ld.lld -shared %t-0.o %t-1.o -o %t.so
# RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=FIRSTLINK %s

View File

@ -1,56 +0,0 @@
# REQUIRES: amdgpu
# RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 %s -o %t.o
# RUN: ld.lld -shared %t.o -o %t
# RUN: llvm-readobj --sections --symbols -l %t | FileCheck %s
.hsa_code_object_version 1,0
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.text
.globl kernel0
.align 256
.amdgpu_hsa_kernel kernel0
kernel0:
s_endpgm
.Lfunc_end0:
.size kernel0, .Lfunc_end0-kernel0
.globl kernel1
.align 256
.amdgpu_hsa_kernel kernel1
kernel1:
s_endpgm
s_endpgm
.Lfunc_end1:
.size kernel1, .Lfunc_end1-kernel1
# CHECK: Section {
# CHECK: Name: .text
# CHECK: Type: SHT_PROGBITS
# CHECK: Flags [ (0x6)
# CHECK: SHF_ALLOC (0x2)
# CHECK: SHF_EXECINSTR (0x4)
# CHECK: ]
# CHECK: }
# CHECK: ProgramHeader {
# CHECK: Type: PT_LOAD
# CHECK: Symbol {
# CHECK: Name: kernel0
# CHECK: Value:
# CHECK: Size: 4
# CHECK: Binding: Global
# CHECK: Type: AMDGPU_HSA_KERNEL
# CHECK: Section: .text
# CHECK: }
# CHECK: Symbol {
# CHECK: Name: kernel1
# CHECK: Value:
# CHECK: Size: 8
# CHECK: Binding: Global
# CHECK: Type: AMDGPU_HSA_KERNEL
# CHECK: Section: .text
# CHECK: }

View File

@ -1705,8 +1705,7 @@ Code Object V2 Note Records
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. warning::
Code object V2 is not the default code object version emitted by
this version of LLVM.
Code object V2 generation is no longer supported by this version of LLVM.
The AMDGPU backend code object uses the following ELF note record in the
``.note`` section when compiling for code object V2.
@ -2974,8 +2973,7 @@ Code Object V2 Metadata
+++++++++++++++++++++++
.. warning::
Code object V2 is not the default code object version emitted by this version
of LLVM.
Code object V2 generation is no longer supported by this version of LLVM.
Code object V2 metadata is specified by the ``NT_AMD_HSA_METADATA`` note record
(see :ref:`amdgpu-note-records-v2`).
@ -14955,8 +14953,7 @@ Code Object V2 Predefined Symbols
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. warning::
Code object V2 is not the default code object version emitted by
this version of LLVM.
Code object V2 generation is no longer supported by this version of LLVM.
The AMDGPU assembler defines and updates some symbols automatically. These
symbols do not affect code generation.
@ -15011,8 +15008,7 @@ Code Object V2 Directives
~~~~~~~~~~~~~~~~~~~~~~~~~
.. warning::
Code object V2 is not the default code object version emitted by
this version of LLVM.
Code object V2 generation is no longer supported by this version of LLVM.
AMDGPU ABI defines auxiliary data in output code object. In assembly source,
one can specify them with assembler directives.
@ -15087,8 +15083,7 @@ Code Object V2 Example Source Code
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. warning::
Code Object V2 is not the default code object version emitted by
this version of LLVM.
Code object V2 generation is no longer supported by this version of LLVM.
Here is an example of a minimal assembly source file, defining one HSA kernel:

View File

@ -121,26 +121,13 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
getTargetStreamer()->EmitDirectiveAMDGCNTarget();
getTargetStreamer()->EmitDirectiveAMDGCNTarget();
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
return;
// HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2(
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
@ -148,8 +135,7 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
if (!IsTargetStreamerInitialized)
initTargetStreamer(M);
if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
CodeObjectVersion == AMDGPU::AMDHSA_COV2)
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
getTargetStreamer()->EmitISAVersion();
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
@ -209,7 +195,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (!MFI.isEntryFunction())
return;
if ((STM.isMesaKernel(F) || CodeObjectVersion == AMDGPU::AMDHSA_COV2) &&
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
@ -226,8 +212,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
if (!MFI.isEntryFunction())
return;
if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
CodeObjectVersion == AMDGPU::AMDHSA_COV2)
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
auto &Streamer = getTargetStreamer()->getStreamer();
@ -261,8 +246,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
}
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
CodeObjectVersion >= AMDGPU::AMDHSA_COV3) {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
@ -337,9 +321,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
case AMDGPU::AMDHSA_COV2:
HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2());
break;
case AMDGPU::AMDHSA_COV3:
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
break;

View File

@ -48,435 +48,6 @@ static cl::opt<bool> VerifyHSAMetadata(
namespace AMDGPU {
namespace HSAMD {
//===----------------------------------------------------------------------===//
// HSAMetadataStreamerV2
//===----------------------------------------------------------------------===//
void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
}
void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
HSAMD::Metadata FromHSAMetadataString;
if (fromString(HSAMetadataString, FromHSAMetadataString)) {
errs() << "FAIL\n";
return;
}
std::string ToHSAMetadataString;
if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
errs() << "FAIL\n";
return;
}
errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
<< '\n';
if (HSAMetadataString != ToHSAMetadataString) {
errs() << "Original input: " << HSAMetadataString << '\n'
<< "Produced output: " << ToHSAMetadataString << '\n';
}
}
AccessQualifier
MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const {
if (AccQual.empty())
return AccessQualifier::Unknown;
return StringSwitch<AccessQualifier>(AccQual)
.Case("read_only", AccessQualifier::ReadOnly)
.Case("write_only", AccessQualifier::WriteOnly)
.Case("read_write", AccessQualifier::ReadWrite)
.Default(AccessQualifier::Default);
}
AddressSpaceQualifier
MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const {
switch (AddressSpace) {
case AMDGPUAS::PRIVATE_ADDRESS:
return AddressSpaceQualifier::Private;
case AMDGPUAS::GLOBAL_ADDRESS:
return AddressSpaceQualifier::Global;
case AMDGPUAS::CONSTANT_ADDRESS:
return AddressSpaceQualifier::Constant;
case AMDGPUAS::LOCAL_ADDRESS:
return AddressSpaceQualifier::Local;
case AMDGPUAS::FLAT_ADDRESS:
return AddressSpaceQualifier::Generic;
case AMDGPUAS::REGION_ADDRESS:
return AddressSpaceQualifier::Region;
default:
return AddressSpaceQualifier::Unknown;
}
}
ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const {
if (TypeQual.contains("pipe"))
return ValueKind::Pipe;
return StringSwitch<ValueKind>(BaseTypeName)
.Case("image1d_t", ValueKind::Image)
.Case("image1d_array_t", ValueKind::Image)
.Case("image1d_buffer_t", ValueKind::Image)
.Case("image2d_t", ValueKind::Image)
.Case("image2d_array_t", ValueKind::Image)
.Case("image2d_array_depth_t", ValueKind::Image)
.Case("image2d_array_msaa_t", ValueKind::Image)
.Case("image2d_array_msaa_depth_t", ValueKind::Image)
.Case("image2d_depth_t", ValueKind::Image)
.Case("image2d_msaa_t", ValueKind::Image)
.Case("image2d_msaa_depth_t", ValueKind::Image)
.Case("image3d_t", ValueKind::Image)
.Case("sampler_t", ValueKind::Sampler)
.Case("queue_t", ValueKind::Queue)
.Default(isa<PointerType>(Ty) ?
(Ty->getPointerAddressSpace() ==
AMDGPUAS::LOCAL_ADDRESS ?
ValueKind::DynamicSharedPointer :
ValueKind::GlobalBuffer) :
ValueKind::ByValue);
}
std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
if (!Signed)
return (Twine('u') + getTypeName(Ty, true)).str();
auto BitWidth = Ty->getIntegerBitWidth();
switch (BitWidth) {
case 8:
return "char";
case 16:
return "short";
case 32:
return "int";
case 64:
return "long";
default:
return (Twine('i') + Twine(BitWidth)).str();
}
}
case Type::HalfTyID:
return "half";
case Type::FloatTyID:
return "float";
case Type::DoubleTyID:
return "double";
case Type::FixedVectorTyID: {
auto VecTy = cast<FixedVectorType>(Ty);
auto ElTy = VecTy->getElementType();
auto NumElements = VecTy->getNumElements();
return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
}
default:
return "unknown";
}
}
std::vector<uint32_t>
MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const {
std::vector<uint32_t> Dims;
if (Node->getNumOperands() != 3)
return Dims;
for (auto &Op : Node->operands())
Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
return Dims;
}
Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps(
const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
const Function &F = MF.getFunction();
assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL);
Align MaxKernArgAlign;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
MaxKernArgAlign);
HSACodeProps.mKernargSegmentAlign =
std::max(MaxKernArgAlign, Align(4)).value();
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
return HSACodeProps;
}
Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps(
const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
return HSAMD::Kernel::DebugProps::Metadata();
}
void MetadataStreamerYamlV2::emitVersion() {
auto &Version = HSAMetadata.mVersion;
Version.push_back(VersionMajorV2);
Version.push_back(VersionMinorV2);
}
void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) {
auto &Printf = HSAMetadata.mPrintf;
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
if (!Node)
return;
for (auto *Op : Node->operands())
if (Op->getNumOperands())
Printf.push_back(
std::string(cast<MDString>(Op->getOperand(0))->getString()));
}
void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) {
auto &Kernel = HSAMetadata.mKernels.back();
// TODO: What about other languages?
auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
if (!Node || !Node->getNumOperands())
return;
auto Op0 = Node->getOperand(0);
if (Op0->getNumOperands() <= 1)
return;
Kernel.mLanguage = "OpenCL C";
Kernel.mLanguageVersion.push_back(
mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
Kernel.mLanguageVersion.push_back(
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
}
void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) {
auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
if (auto Node = Func.getMetadata("reqd_work_group_size"))
Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
if (auto Node = Func.getMetadata("work_group_size_hint"))
Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
if (auto Node = Func.getMetadata("vec_type_hint")) {
Attrs.mVecTypeHint = getTypeName(
cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
}
if (Func.hasFnAttribute("runtime-handle")) {
Attrs.mRuntimeHandle =
Func.getFnAttribute("runtime-handle").getValueAsString().str();
}
}
void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func,
const GCNSubtarget &ST) {
for (auto &Arg : Func.args())
emitKernelArg(Arg);
emitHiddenKernelArgs(Func, ST);
}
void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) {
auto Func = Arg.getParent();
auto ArgNo = Arg.getArgNo();
const MDNode *Node;
StringRef Name;
Node = Func->getMetadata("kernel_arg_name");
if (Node && ArgNo < Node->getNumOperands())
Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
else if (Arg.hasName())
Name = Arg.getName();
StringRef TypeName;
Node = Func->getMetadata("kernel_arg_type");
if (Node && ArgNo < Node->getNumOperands())
TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
StringRef BaseTypeName;
Node = Func->getMetadata("kernel_arg_base_type");
if (Node && ArgNo < Node->getNumOperands())
BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
StringRef AccQual;
if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
Arg.hasNoAliasAttr()) {
AccQual = "read_only";
} else {
Node = Func->getMetadata("kernel_arg_access_qual");
if (Node && ArgNo < Node->getNumOperands())
AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
}
StringRef TypeQual;
Node = Func->getMetadata("kernel_arg_type_qual");
if (Node && ArgNo < Node->getNumOperands())
TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
const DataLayout &DL = Func->getParent()->getDataLayout();
MaybeAlign PointeeAlign;
if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
// FIXME: Should report this for all address spaces
PointeeAlign = Arg.getParamAlign().valueOrOne();
}
}
Type *ArgTy;
Align ArgAlign;
std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
emitKernelArg(DL, ArgTy, ArgAlign,
getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
TypeName, BaseTypeName, AccQual, TypeQual);
}
void MetadataStreamerYamlV2::emitKernelArg(
const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind,
MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName,
StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) {
HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
Arg.mName = std::string(Name);
Arg.mTypeName = std::string(TypeName);
Arg.mSize = DL.getTypeAllocSize(Ty);
Arg.mAlign = Alignment.value();
Arg.mValueKind = ValueKind;
Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
if (auto PtrTy = dyn_cast<PointerType>(Ty))
Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
Arg.mAccQual = getAccessQualifier(AccQual);
// TODO: Emit Arg.mActualAccQual.
SmallVector<StringRef, 1> SplitTypeQuals;
TypeQual.split(SplitTypeQuals, " ", -1, false);
for (StringRef Key : SplitTypeQuals) {
auto P = StringSwitch<bool*>(Key)
.Case("const", &Arg.mIsConst)
.Case("restrict", &Arg.mIsRestrict)
.Case("volatile", &Arg.mIsVolatile)
.Case("pipe", &Arg.mIsPipe)
.Default(nullptr);
if (P)
*P = true;
}
}
void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func,
const GCNSubtarget &ST) {
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
if (!HiddenArgNumBytes)
return;
auto &DL = Func.getParent()->getDataLayout();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
if (HiddenArgNumBytes >= 8)
emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
if (HiddenArgNumBytes >= 16)
emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
if (HiddenArgNumBytes >= 24)
emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
auto Int8PtrTy =
PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
if (HiddenArgNumBytes >= 32) {
// We forbid the use of features requiring hostcall when compiling OpenCL
// before code object V5, which makes the mutual exclusion between the
// "printf buffer" and "hostcall buffer" here sound.
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
else
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
}
// Emit "default queue" and "completion action" arguments if enqueue kernel is
// used, otherwise emit dummy "none" arguments.
if (HiddenArgNumBytes >= 40) {
if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
} else {
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
}
}
if (HiddenArgNumBytes >= 48) {
if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
} else {
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
}
}
// Emit the pointer argument for multi-grid object.
if (HiddenArgNumBytes >= 56) {
if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
else
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
}
}
bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
}
void MetadataStreamerYamlV2::begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
emitPrintf(Mod);
}
void MetadataStreamerYamlV2::end() {
std::string HSAMetadataString;
if (toString(HSAMetadata, HSAMetadataString))
return;
if (DumpHSAMetadata)
dump(HSAMetadataString);
if (VerifyHSAMetadata)
verify(HSAMetadataString);
}
void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return;
auto CodeProps = getHSACodeProps(MF, ProgramInfo);
auto DebugProps = getHSADebugProps(MF, ProgramInfo);
HSAMetadata.mKernels.push_back(Kernel::Metadata());
auto &Kernel = HSAMetadata.mKernels.back();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Kernel.mName = std::string(Func.getName());
Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
emitKernelLanguage(Func);
emitKernelAttrs(Func);
emitKernelArgs(Func, ST);
HSAMetadata.mKernels.back().mCodeProps = CodeProps;
HSAMetadata.mKernels.back().mDebugProps = DebugProps;
}
//===----------------------------------------------------------------------===//
// HSAMetadataStreamerV3
//===----------------------------------------------------------------------===//

View File

@ -159,82 +159,6 @@ public:
~MetadataStreamerMsgPackV5() = default;
};
// TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
class MetadataStreamerYamlV2 final : public MetadataStreamer {
private:
Metadata HSAMetadata;
void dump(StringRef HSAMetadataString) const;
void verify(StringRef HSAMetadataString) const;
AccessQualifier getAccessQualifier(StringRef AccQual) const;
AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
ValueKind getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const;
std::string getTypeName(Type *Ty, bool Signed) const;
std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
Kernel::CodeProps::Metadata getHSACodeProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
Kernel::DebugProps::Metadata getHSADebugProps(
const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const;
void emitPrintf(const Module &Mod);
void emitKernelLanguage(const Function &Func);
void emitKernelAttrs(const Function &Func);
void emitKernelArgs(const Function &Func, const GCNSubtarget &ST);
void emitKernelArg(const Argument &Arg);
void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment,
ValueKind ValueKind,
MaybeAlign PointeeAlign = std::nullopt,
StringRef Name = "", StringRef TypeName = "",
StringRef BaseTypeName = "", StringRef AccQual = "",
StringRef TypeQual = "");
void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST);
const Metadata &getHSAMetadata() const {
return HSAMetadata;
}
protected:
void emitVersion() override;
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) override {
llvm_unreachable("Dummy override should not be invoked!");
}
void emitKernelAttrs(const Function &Func,
msgpack::MapDocNode Kern) override {
llvm_unreachable("Dummy override should not be invoked!");
}
public:
MetadataStreamerYamlV2() = default;
~MetadataStreamerYamlV2() = default;
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
void begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) override;
void end() override;
void emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) override;
};
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm

View File

@ -1344,7 +1344,7 @@ public:
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@ -1361,7 +1361,7 @@ public:
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
initializeGprCountSymbol(IS_VGPR);
initializeGprCountSymbol(IS_SGPR);
} else
@ -2861,7 +2861,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
return nullptr;
}
if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (isHsaAbi(getSTI())) {
if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
return nullptr;
} else
@ -4920,7 +4920,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
if (!isHsaAbi(getSTI()))
return TokError("directive only supported for amdhsa OS");
StringRef KernelName;
@ -5480,33 +5480,15 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
}
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveBegin;
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
isHsaAbiVersion3AndAbove(&getSTI())
? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd)
: std::pair(HSAMD::AssemblerDirectiveBegin,
HSAMD::AssemblerDirectiveEnd);
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
return Error(getLoc(),
(Twine(AssemblerDirectiveBegin) + Twine(" directive is "
"not available on non-amdhsa OSes")).str());
}
assert(isHsaAbi(getSTI()));
std::string HSAMetadataString;
if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
HSAMetadataString))
if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
return true;
if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
} else {
if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
}
if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
return Error(getLoc(), "invalid HSA metadata");
return false;
}
@ -5649,7 +5631,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
if (isHsaAbiVersion3AndAbove(&getSTI())) {
if (isHsaAbi(getSTI())) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
@ -5672,8 +5654,12 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
Twine(" directive is "
"not available on non-amdhsa OSes"))
.str());
}
}
if (IDVal == ".amdgcn_target")
@ -7765,7 +7751,7 @@ void AMDGPUAsmParser::onBeginOfFile() {
// TODO: Should try to check code object version from directive???
AMDGPU::getAmdhsaCodeObjectVersion());
if (isHsaAbiVersion3AndAbove(&getSTI()))
if (isHsaAbi(getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}

View File

@ -424,8 +424,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
switch (CodeObjectVersion) {
default:
break;
case AMDGPU::AMDHSA_COV2:
break;
case AMDGPU::AMDHSA_COV3:
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
@ -545,7 +543,7 @@ void AMDGPUTargetELFStreamer::EmitNote(
unsigned NoteFlags = 0;
// TODO Apparently, this is currently needed for OpenCL as mentioned in
// https://reviews.llvm.org/D74995
if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
if (isHsaAbi(STI))
NoteFlags = ELF::SHF_ALLOC;
S.pushSection();
@ -604,11 +602,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
}
unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
assert(isHsaAbi(STI));
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVer) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
return getEFlagsV3();
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:

View File

@ -118,13 +118,16 @@ namespace llvm {
namespace AMDGPU {
/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI) {
return STI.getTargetTriple().getOS() == Triple::AMDHSA;
}
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
return std::nullopt;
switch (AmdhsaCodeObjectVersion) {
case 2:
return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
case 3:
return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
case 4:
@ -137,12 +140,6 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
}
}
bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
return false;
}
bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
@ -161,11 +158,6 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
return false;
}
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
isHsaAbiVersion5(STI);
}
unsigned getAmdhsaCodeObjectVersion() {
return AmdhsaCodeObjectVersion;
}
@ -182,7 +174,6 @@ unsigned getCodeObjectVersion(const Module &M) {
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 48;
@ -197,7 +188,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
// central TD file.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 24;
@ -209,7 +199,6 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 32;
@ -221,7 +210,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
case AMDHSA_COV2:
case AMDHSA_COV3:
case AMDHSA_COV4:
return 40;
@ -786,54 +774,6 @@ std::string AMDGPUTargetID::toString() const {
std::string Features;
if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
case AMDGPU::AMDHSA_COV2:
// Code object V2 only supported specific processors and had fixed
// settings for the XNACK.
if (Processor == "gfx600") {
} else if (Processor == "gfx601") {
} else if (Processor == "gfx602") {
} else if (Processor == "gfx700") {
} else if (Processor == "gfx701") {
} else if (Processor == "gfx702") {
} else if (Processor == "gfx703") {
} else if (Processor == "gfx704") {
} else if (Processor == "gfx705") {
} else if (Processor == "gfx801") {
if (!isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor) + " without XNACK");
} else if (Processor == "gfx802") {
} else if (Processor == "gfx803") {
} else if (Processor == "gfx805") {
} else if (Processor == "gfx810") {
if (!isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor) + " without XNACK");
} else if (Processor == "gfx900") {
if (isXnackOnOrAny())
Processor = "gfx901";
} else if (Processor == "gfx902") {
if (isXnackOnOrAny())
Processor = "gfx903";
} else if (Processor == "gfx904") {
if (isXnackOnOrAny())
Processor = "gfx905";
} else if (Processor == "gfx906") {
if (isXnackOnOrAny())
Processor = "gfx907";
} else if (Processor == "gfx90c") {
if (isXnackOnOrAny())
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor) + " with XNACK being ON or ANY");
} else {
report_fatal_error(
"AMD GPU code object V2 does not support processor " +
Twine(Processor));
}
break;
case AMDGPU::AMDHSA_COV3:
// xnack.
if (isXnackOnOrAny())

View File

@ -43,17 +43,15 @@ namespace AMDGPU {
struct IsaVersion;
enum {
AMDHSA_COV2 = 2,
AMDHSA_COV3 = 3,
AMDHSA_COV4 = 4,
AMDHSA_COV5 = 5
};
/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI);
/// \returns HSA OS ABI Version identification.
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 2,
/// false otherwise.
bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 3,
/// false otherwise.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
@ -63,9 +61,6 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 5,
/// false otherwise.
bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 3 and above,
/// false otherwise.
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);

View File

@ -3,12 +3,10 @@
declare i64 @llvm.amdgcn.dispatch.id() #1
; GCN-LABEL: {{^}}dispatch_id:
; GCN: .amd_kernel_code_t
; GCN: enable_sgpr_dispatch_id = 1
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; GCN: .amdhsa_user_sgpr_dispatch_id 1
define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 {
%tmp0 = call i64 @llvm.amdgcn.dispatch.id()
store i64 %tmp0, ptr addrspace(1) %out
@ -19,4 +17,4 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -3,8 +3,8 @@
; FIXME: Error on non-HSA target
; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_dispatch_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
%dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
%value = load i32, ptr addrspace(4) %dispatch_ptr
@ -17,4 +17,4 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,15 +1,15 @@
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,HSA,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,OS-MESA3D,ALL %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s
; ALL-LABEL: {{^}}test:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 8
; HSA: kernarg_segment_alignment = 4
; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1
; CO-V4: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
; HSA: .amdhsa_kernarg_size 8
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
%kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
%gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
@ -19,13 +19,14 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_implicit:
; HSA: kernarg_segment_byte_size = 8
; OS-MESA3D: kernarg_segment_byte_size = 24
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15
; HSA: .amdhsa_kernarg_size 8
define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10
@ -35,16 +36,16 @@ define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_implicit_alignment:
; HSA: kernarg_segment_byte_size = 12
; OS-MESA3D: kernarg_segment_byte_size = 28
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
; HSA: .amdhsa_kernarg_size 12
define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load i32, ptr addrspace(4) %implicitarg.ptr
@ -53,16 +54,16 @@ define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment
; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 28
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
; HSA: .amdhsa_kernarg_size 64
define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load i32, ptr addrspace(4) %implicitarg.ptr
@ -71,12 +72,15 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out,
}
; ALL-LABEL: {{^}}test_no_kernargs:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 0
; CO-V2: kernarg_segment_byte_size = 0
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 0
; OS-MESA3D: kernarg_segment_byte_size = 0
; OS-MESA3D: kernarg_segment_alignment = 4
; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}}
; HSA: s_load_dword s{{[0-9]+}}, [[OFFSET_NULL]]
; HSA: .amdhsa_kernarg_size 0
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0
define amdgpu_kernel void @test_no_kernargs() #1 {
%kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
%gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
@ -86,9 +90,9 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
; HSA: kernarg_segment_byte_size = 48
; OS-MESA3D: kernarg_segment_byte_size = 16
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; HSA: .amdhsa_kernarg_size 48
define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
@ -97,9 +101,9 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs()
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up:
; HSA: kernarg_segment_byte_size = 40
; OS-MESA3D: kernarg_segment_byte_size = 16
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; HSA: .amdhsa_kernarg_size 40
define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
@ -124,4 +128,4 @@ attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -3,8 +3,8 @@
; FIXME: Error on non-hsa target
; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_queue_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; GCN: .amdhsa_user_sgpr_queue_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
%queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
%value = load i32, ptr addrspace(4) %queue_ptr
@ -17,4 +17,4 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
attributes #0 = { nounwind readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,9 +1,7 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
@ -11,25 +9,25 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0
; ALL-LABEL: {{^}}test_workgroup_id_x:
; CO-V2: .amd_kernel_code_t
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 0
; CO-V2: enable_sgpr_workgroup_id_z = 0
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_vgpr_workitem_id = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; CO-V2: .end_amd_kernel_code_t
; MESA3D: .amd_kernel_code_t
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 0
; MESA3D: enable_sgpr_workgroup_id_z = 0
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_vgpr_workitem_id = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: .end_amd_kernel_code_t
; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}}
; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}}
; MESA3D: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}}
; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
@ -41,22 +39,22 @@ define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_workgroup_id_y:
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 1
; CO-V2: enable_sgpr_workgroup_id_z = 0
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 1
; MESA3D: enable_sgpr_workgroup_id_z = 0
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0
; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}}
; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}
; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
@ -68,30 +66,30 @@ define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_workgroup_id_z:
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 0
; CO-V2: enable_sgpr_workgroup_id_z = 1
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_vgpr_workitem_id = 0
; CO-V2: enable_sgpr_private_segment_buffer = 1
; CO-V2: enable_sgpr_dispatch_ptr = 0
; CO-V2: enable_sgpr_queue_ptr = 0
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
; CO-V2: enable_sgpr_dispatch_id = 0
; CO-V2: enable_sgpr_flat_scratch_init = 0
; CO-V2: enable_sgpr_private_segment_size = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 0
; MESA3D: enable_sgpr_workgroup_id_z = 1
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_vgpr_workitem_id = 0
; MESA3D: enable_sgpr_private_segment_buffer = 1
; MESA3D: enable_sgpr_dispatch_ptr = 0
; MESA3D: enable_sgpr_queue_ptr = 0
; MESA3D: enable_sgpr_kernarg_segment_ptr = 1
; MESA3D: enable_sgpr_dispatch_id = 0
; MESA3D: enable_sgpr_flat_scratch_init = 0
; MESA3D: enable_sgpr_private_segment_size = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0
; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}}
; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}
; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
@ -106,4 +104,4 @@ attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,11 +1,11 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@ -16,7 +16,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
; MESA-NEXT: .long 132{{$}}
; ALL-LABEL: {{^}}test_workitem_id_x:
; CO-V2: enable_vgpr_workitem_id = 0
; MESA3D: enable_vgpr_workitem_id = 0
; ALL-NOT: v0
; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
@ -33,9 +33,9 @@ define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
; MESA-NEXT: .long 2180{{$}}
; ALL-LABEL: {{^}}test_workitem_id_y:
; CO-V2: enable_vgpr_workitem_id = 1
; CO-V2-NOT: v1
; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1
; MESA3D: enable_vgpr_workitem_id = 1
; MESA3D-NOT: v1
; MESA3D: {{buffer|flat}}_store_dword {{.*}}v1
; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
@ -51,9 +51,9 @@ define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
; MESA-NEXT: .long 4228{{$}}
; ALL-LABEL: {{^}}test_workitem_id_z:
; CO-V2: enable_vgpr_workitem_id = 2
; CO-V2-NOT: v2
; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2
; MESA3D: enable_vgpr_workitem_id = 2
; MESA3D-NOT: v2
; MESA3D: {{buffer|flat}}_store_dword {{.*}}v2
; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
@ -129,7 +129,7 @@ define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 {
; FIXME: Should be able to avoid enabling in kernel inputs
; FIXME: Packed tid should avoid the and
; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only:
; CO-V2: enable_vgpr_workitem_id = 0
; MESA3D: enable_vgpr_workitem_id = 0
; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
@ -150,7 +150,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_
}
; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only:
; CO-V2: enable_vgpr_workitem_id = 1
; MESA3D: enable_vgpr_workitem_id = 1
; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
@ -172,7 +172,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_
}
; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only:
; CO-V2: enable_vgpr_workitem_id = 2
; MESA3D: enable_vgpr_workitem_id = 2
; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
@ -200,4 +200,4 @@ attributes #1 = { nounwind }
!2 = !{i32 1, i32 1, i32 64}
!llvm.module.flags = !{!99}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -2,10 +2,6 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; CI: enable_sgpr_queue_ptr = 1
; GFX9: enable_sgpr_queue_ptr = 0
; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
@ -24,6 +20,11 @@
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; CI: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; At most 2 digits. Make sure src_shared_base is not counted as a high
; number SGPR.
@ -59,10 +60,6 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
}
; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; CI: enable_sgpr_queue_ptr = 1
; GFX9: enable_sgpr_queue_ptr = 0
; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
@ -82,6 +79,11 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; CI: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; HSA: NumSgprs: {{[0-9]+}}
define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
@ -91,13 +93,14 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
; no-op
; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
store volatile i32 7, ptr %stof
@ -131,9 +134,6 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4)
}
; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; HSA: enable_sgpr_queue_ptr = 0
; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
@ -146,6 +146,10 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4)
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]]
; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]]
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(3)
store volatile i32 0, ptr addrspace(3) %ftos
@ -153,9 +157,6 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
}
; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
; HSA: enable_sgpr_queue_ptr = 0
; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
; CI-DAG v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
@ -171,6 +172,10 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(5)
store volatile i32 0, ptr addrspace(5) %ftos
@ -178,7 +183,6 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
}
; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
@ -188,6 +192,8 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(1)
store volatile i32 0, ptr addrspace(1) %ftos
@ -195,10 +201,11 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
}
; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
; HSA: enable_sgpr_queue_ptr = 0
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
%ftos = addrspacecast ptr %ptr to ptr addrspace(4)
load volatile i32, ptr addrspace(4) %ftos
@ -279,13 +286,14 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast:
; CI: enable_sgpr_queue_ptr = 1
; GFX9: enable_sgpr_queue_ptr = 0
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
; CI: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr
store volatile i32 7, ptr %cast
@ -416,4 +424,4 @@ attributes #2 = { nounwind readnone }
attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,16 +1,16 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc < %s -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
; RUN: opt < %s -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
; RUN: opt < %s -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC
; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] poison, align 4
; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] poison, align 16
@ -24,9 +24,7 @@
; R600: LDS_READ
; R600: LDS_READ
; HSA-PROMOTE: .amd_kernel_code_t
; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120
; HSA-PROMOTE: .end_amd_kernel_code_t
; HSA-PROMOTE: .amdhsa_group_segment_fixed_size 5120
; HSA-PROMOTE: s_load_dwordx2 s[{{[0-9:]+}}], s[4:5], 0x1
@ -35,14 +33,12 @@
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
; HSA-ALLOCA: .amd_kernel_code_t
; FIXME: Creating the emergency stack slots causes us to over-estimate scratch
; by 4 bytes.
; HSA-ALLOCA: workitem_private_segment_byte_size = 24
; HSA-ALLOCA: .end_amd_kernel_code_t
; HSA-ALLOCA: .amdhsa_private_segment_fixed_size 24
; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7
; HSA-ALLOCA: s_add_i32 s6, s6, s9
; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7
; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; encoding: [0x00,0x10,0x70,0xe0
@ -534,7 +530,7 @@ attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-s
attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
!llvm.module.flags = !{!99}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 400}
; HSAOPT: !1 = !{}
; HSAOPT: !2 = !{i32 0, i32 257}

View File

@ -130,16 +130,14 @@ define amdgpu_kernel void @min_1024_max_1024() #3 {
attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata)
; HSAMD: Version: [ 1, 0 ]
; HSAMD: Kernels:
; HSAMD: - Name: min_64_max_64
; HSAMD: MaxFlatWorkGroupSize: 64
; HSAMD: - Name: min_64_max_128
; HSAMD: MaxFlatWorkGroupSize: 128
; HSAMD: - Name: min_128_max_128
; HSAMD: MaxFlatWorkGroupSize: 128
; HSAMD: - Name: min_1024_max_1024
; HSAMD: MaxFlatWorkGroupSize: 1024
; HSAMD: amdhsa.kernels
; HSAMD: .max_flat_workgroup_size: 64
; HSAMD: .name: min_64_max_64
; HSAMD: .max_flat_workgroup_size: 128
; HSAMD: .name: min_64_max_128
; HSAMD: .max_flat_workgroup_size: 128
; HSAMD: .name: min_128_max_128
; HSAMD: .max_flat_workgroup_size: 1024
; HSAMD: .name: min_1024_max_1024

View File

@ -1,7 +1,7 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
; Make sure to run a GPU with the SGPR allocation bug.
@ -32,7 +32,6 @@ define void @indirect_use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 38
; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
@ -61,7 +60,6 @@ define void @indirect_use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 38
; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
@ -87,7 +85,6 @@ define void @indirect_use_10_vgpr() #0 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
; GCN: is_dynamic_callstack = 0
; GCN: ; NumVgprs: 41
define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
call void @indirect_use_10_vgpr()
@ -123,7 +120,6 @@ define void @indirect_use_80_sgpr() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 84
; VI-NOBUG: ; NumSgprs: 86
; VI-BUG: ; NumSgprs: 96
@ -159,7 +155,6 @@ define void @indirect_use_stack() #1 {
}
; GCN-LABEL: {{^}}indirect_2_level_use_stack:
; GCN: is_dynamic_callstack = 0
; GCN: ScratchSize: 2132
define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
call void @indirect_use_stack()
@ -169,7 +164,6 @@ define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
; Should be maximum of callee usage
; GCN-LABEL: {{^}}multi_call_use_use_stack:
; GCN: is_dynamic_callstack = 0
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_use_use_stack() #0 {
call void @use_stack0()
@ -181,7 +175,6 @@ define amdgpu_kernel void @multi_call_use_use_stack() #0 {
declare void @external() #0
; GCN-LABEL: {{^}}usage_external:
; GCN: is_dynamic_callstack = 1
; NumSgprs: 48
; NumVgprs: 24
; GCN: ScratchSize: 16384
@ -196,7 +189,6 @@ define amdgpu_kernel void @usage_external() #0 {
declare void @external_recurse() #2
; GCN-LABEL: {{^}}usage_external_recurse:
; GCN: is_dynamic_callstack = 1
; NumSgprs: 48
; NumVgprs: 24
; GCN: ScratchSize: 16384
@ -229,9 +221,7 @@ ret:
}
; GCN-LABEL: {{^}}usage_direct_recursion:
; GCN: is_ptr64 = 1
; GCN: is_dynamic_callstack = 1
; GCN: workitem_private_segment_byte_size = 18448{{$}}
; GCN: .amdhsa_private_segment_fixed_size 18448
;
; GCN-V5-LABEL: {{^}}usage_direct_recursion:
; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}}

View File

@ -111,7 +111,6 @@ define void @use_workitem_id_yz() #1 {
}
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v31
@ -120,13 +119,14 @@ define void @use_workitem_id_yz() #1 {
; FIXEDABI-NOT: v31
; GCN: s_swappc_b64
; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
call void @use_workitem_id_x()
ret void
}
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
; GCN: enable_vgpr_workitem_id = 1
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
@ -137,13 +137,14 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
; FIXEDABI-NOT: v2
; GCN: s_swappc_b64
; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
call void @use_workitem_id_y()
ret void
}
; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
; GCN: enable_vgpr_workitem_id = 2
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
@ -152,6 +153,8 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
; FIXEDABI-NOT: v1
; GCN: s_swappc_b64
; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
call void @use_workitem_id_z()
ret void
@ -284,13 +287,14 @@ define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0
; FIXEDABI-NOT: v0
; FIXEDABI: v_mov_b32_e32 v31, v0
; FIXEDABI: v_mov_b32_e32 v0, 0x22b
; GCN: s_swappc_b64
; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
call void @other_arg_use_workitem_id_x(i32 555)
ret void
@ -298,26 +302,28 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
; GCN: enable_vgpr_workitem_id = 1
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
; FIXEDABI: v_mov_b32_e32 v0, 0x22b
; GCN: .amdhsa_system_vgpr_workitem_id 1
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
call void @other_arg_use_workitem_id_y(i32 555)
ret void
}
; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
; GCN: enable_vgpr_workitem_id = 2
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
; FIXEDABI-NOT: v2
; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
; FIXEDABI: v_mov_b32_e32 v0, 0x22b
; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
call void @other_arg_use_workitem_id_z(i32 555)
ret void
@ -374,7 +380,6 @@ define void @too_many_args_use_workitem_id_x(
}
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
; GCN: enable_vgpr_workitem_id = 0
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
@ -385,6 +390,8 @@ define void @too_many_args_use_workitem_id_x(
; FIXEDABI-DAG: v_mov_b32_e32 v31, v0
; FIXEDABI: s_swappc_b64
; GCN: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
call void @too_many_args_use_workitem_id_x(
i32 10, i32 20, i32 30, i32 40,
@ -639,7 +646,6 @@ define void @too_many_args_use_workitem_id_xyz(
}
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
; GCN: enable_vgpr_workitem_id = 2
; GCN-DAG: s_mov_b32 s32, 0
@ -652,6 +658,8 @@ define void @too_many_args_use_workitem_id_xyz(
; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
; GCN: s_swappc_b64
; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
call void @too_many_args_use_workitem_id_xyz(
i32 10, i32 20, i32 30, i32 40,
@ -729,7 +737,6 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
}
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
; GCN: enable_vgpr_workitem_id = 2
; GCN-NOT: v0
; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1
@ -739,6 +746,8 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
; GCN: s_mov_b32 s32, 0
; GCN: s_swappc_b64
; GCN: .amdhsa_system_vgpr_workitem_id 2
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
call void @too_many_args_use_workitem_id_x_stack_yz(
i32 10, i32 20, i32 30, i32 40,
@ -804,4 +813,4 @@ attributes #1 = { nounwind noinline }
attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -10,8 +10,6 @@
; GCN-LABEL: {{^}}divergent_if_endif:
; VGPR: workitem_private_segment_byte_size = 16{{$}}
; GCN: {{^}}; %bb.0:
; GCN: s_mov_b32 m0, -1
@ -63,6 +61,8 @@
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]]
; VGPR: .amdhsa_private_segment_fixed_size 16
define amdgpu_kernel void @divergent_if_endif(ptr addrspace(1) %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -82,7 +82,6 @@ endif:
}
; GCN-LABEL: {{^}}divergent_loop:
; VGPR: workitem_private_segment_byte_size = 20{{$}}
; GCN: {{^}}; %bb.0:
; GCN-DAG: s_mov_b32 m0, -1
@ -133,6 +132,8 @@ endif:
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]
; VGPR: .amdhsa_private_segment_fixed_size 20
define amdgpu_kernel void @divergent_loop(ptr addrspace(1) %out) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -274,4 +275,4 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,18 +1,15 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
; There are no stack objects even though flat is used by default, so
; flat_scratch_init should be disabled.
; ALL-LABEL: {{^}}test:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_flat_scratch_init = 0
; HSA: .end_amd_kernel_code_t
; ALL-NOT: flat_scr
@ -20,6 +17,8 @@
; HSA-NODEFAULT: buffer_store_dword
; HSA-NOADDR64: flat_store_dword
; HSA: .amdhsa_user_sgpr_flat_scratch_init 0
; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
; NOHSA-NOADDR64: flat_store_dword
@ -53,4 +52,4 @@ entry:
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,32 +1,26 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s
; RUN: llc < %s -mtriple=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s
; GCN-LABEL: {{^}}no_vcc_no_flat:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
@ -44,9 +38,6 @@ entry:
; GCN-LABEL: {{^}}vcc_no_flat:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
@ -64,9 +55,6 @@ entry:
; GCN-LABEL: {{^}}no_vcc_flat:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
@ -84,9 +72,6 @@ entry:
; GCN-LABEL: {{^}}vcc_flat:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
@ -107,9 +92,6 @@ entry:
; GCN-LABEL: {{^}}use_flat_scr:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
@ -127,9 +109,6 @@ entry:
; GCN-LABEL: {{^}}use_flat_scr_lo:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
@ -147,9 +126,6 @@ entry:
; GCN-LABEL: {{^}}use_flat_scr_hi:
; HSA-CI-V2: is_xnack_enabled = 0
; HSA-VI-XNACK-V2: is_xnack_enabled = 1
; NOT-HSA-CI: .amdhsa_reserve_xnack_mask
; HSA-VI-NOXNACK: .amdhsa_reserve_xnack_mask 0
; HSA-VI-XNACK: .amdhsa_reserve_xnack_mask 1
@ -168,4 +144,4 @@ entry:
attributes #0 = { nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,11 +1,10 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s
; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
; CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-"
define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind {
store float 0.0, ptr addrspace(1) %out0
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -3,11 +3,11 @@
; Make sure that with an HSA triple, we don't default to an
; unsupported device.
; CHECK: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; CHECK: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind {
store float 0.0, ptr addrspace(1) %out0
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,9 +1,9 @@
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}test_default_ci:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -11,9 +11,9 @@ define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace
}
; GCN-LABEL: {{^}}test_default_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #1 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -21,9 +21,9 @@ define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace
}
; GCN-LABEL: {{^}}test_f64_denormals:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 192
define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #2 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -31,9 +31,9 @@ define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrsp
}
; GCN-LABEL: {{^}}test_f32_denormals:
; GCN: float_mode = 48
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 48
define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #3 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -41,9 +41,9 @@ define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrsp
}
; GCN-LABEL: {{^}}test_f32_f64_denormals:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #4 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -51,9 +51,9 @@ define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr ad
}
; GCN-LABEL: {{^}}test_no_denormals:
; GCN: float_mode = 0
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 0
define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #5 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -61,9 +61,9 @@ define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspa
}
; GCN-LABEL: {{^}}test_no_dx10_clamp_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 0
; GCN: enable_ieee_mode = 1
; GCN: .amdhsa_dx10_clamp 0
; GCN: .amdhsa_ieee_mode 1
; GCN: FloatMode: 240
define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #6 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -71,9 +71,9 @@ define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr add
}
; GCN-LABEL: {{^}}test_no_ieee_mode_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 0
; GCN: .amdhsa_dx10_clamp 1
; GCN: .amdhsa_ieee_mode 0
; GCN: FloatMode: 240
define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #7 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -81,9 +81,9 @@ define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addr
}
; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 0
; GCN: enable_ieee_mode = 0
; GCN: .amdhsa_dx10_clamp 0
; GCN: .amdhsa_ieee_mode 0
; GCN: FloatMode: 240
define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #8 {
store float 0.0, ptr addrspace(1) %out0
store double 0.0, ptr addrspace(1) %out1
@ -101,4 +101,4 @@ attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" }
attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck --check-prefix=ELF %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=4 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
; The SHT_NOTE section contains the output from the .hsa_code_object_*
; directives.
@ -18,12 +18,13 @@
; ELF: }
; ELF: SHT_NOTE
; ELF: 0000: 04000000 08000000 01000000 414D4400
; ELF: 0010: 02000000 01000000 04000000 1B000000
; ELF: 0020: 03000000 414D4400 04000700 07000000
; ELF: 0030: 00000000 00000000 414D4400 414D4447
; ELF: 0040: 50550000
; ELF: 0000: 07000000 4F000000 20000000 414D4447
; ELF: 0010: 50550000 83AE616D 64687361 2E6B6572
; ELF: 0020: 6E656C73 90AD616D 64687361 2E746172
; ELF: 0030: 676574BD 616D6467 636E2D75 6E6B6E6F
; ELF: 0040: 776E2D61 6D646873 612D2D67 66783730
; ELF: 0050: 30AE616D 64687361 2E766572 73696F6E
; ELF: 0060: 92010100
; ELF: Symbol {
; ELF: Name: simple
@ -32,9 +33,8 @@
; ELF: }
; HSA: .text
; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801"
; HSA-NOT: .amdgpu_hsa_kernel simple
; HSA: .globl simple
@ -69,4 +69,4 @@ entry:
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,35 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; CHECK: - Name: test_ro_arg
; CHECK-NEXT: SymbolName: 'test_ro_arg@kd'
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: in
; CHECK-NEXT: TypeName: 'float*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: AccQual: ReadOnly
; CHECK-NEXT: IsConst: true
; CHECK-NEXT: IsRestrict: true
; CHECK-NEXT: - Name: out
; CHECK-NEXT: TypeName: 'float*'
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: AccQual: Default
define amdgpu_kernel void @test_ro_arg(ptr addrspace(1) noalias readonly %in, ptr addrspace(1) %out)
!kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2
!kernel_arg_base_type !2 !kernel_arg_type_qual !3 {
ret void
}
!0 = !{i32 1, i32 1}
!1 = !{!"none", !"none"}
!2 = !{!"float*", !"float*"}
!3 = !{!"const restrict", !""}
!llvm.module.flags = !{!99}
!99 = !{i32 1, !"amdgpu_code_object_version", i32 200}

View File

@ -1,31 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; CHECK: - .args:
; CHECK-NEXT: - .actual_access: read_only
; CHECK-NEXT: .address_space: global
; CHECK-NEXT: .is_const: true
; CHECK-NEXT: .is_restrict: true
; CHECK-NEXT: .name: in
; CHECK-NEXT: .offset: 0
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .type_name: 'float*'
; CHECK-NEXT: .value_kind: global_buffer
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .name: out
; CHECK-NEXT: .offset: 8
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .type_name: 'float*'
; CHECK-NEXT: .value_kind: global_buffer
; CHECK: .name: test_ro_arg
; CHECK: .symbol: test_ro_arg.kd
define amdgpu_kernel void @test_ro_arg(ptr addrspace(1) noalias readonly %in, ptr addrspace(1) %out)
!kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2
!kernel_arg_base_type !2 !kernel_arg_type_qual !3 {
ret void
}
!0 = !{i32 1, i32 1}
!1 = !{!"none", !"none"}
!2 = !{!"float*", !"float*"}
!3 = !{!"const restrict", !""}

View File

@ -1,92 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK-NOT: Printf:
; CHECK: Kernels:
; CHECK: - Name: test_non_enqueue_kernel_caller
; CHECK-NEXT: SymbolName: 'test_non_enqueue_kernel_caller@kd'
; CHECK-NEXT: Language: OpenCL C
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
; CHECK-NEXT: TypeName: char
; CHECK-NEXT: Size: 1
; CHECK-NEXT: Align: 1
; CHECK-NEXT: ValueKind: ByValue
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NOT: ValueKind: HiddenDefaultQueue
; CHECK-NOT: ValueKind: HiddenCompletionAction
define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
ret void
}
; CHECK: - Name: test_enqueue_kernel_caller
; CHECK-NEXT: SymbolName: 'test_enqueue_kernel_caller@kd'
; CHECK-NEXT: Language: OpenCL C
; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
; CHECK-NEXT: Args:
; CHECK-NEXT: - Name: a
; CHECK-NEXT: TypeName: char
; CHECK-NEXT: Size: 1
; CHECK-NEXT: Align: 1
; CHECK-NEXT: ValueKind: ByValue
; CHECK-NEXT: AccQual: Default
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenDefaultQueue
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenCompletionAction
; CHECK-NEXT: AddrSpaceQual: Global
define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
ret void
}
attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" }
attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"char"}
!4 = !{!""}
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
; PARSER: AMDGPU HSA Metadata Parser Test: PASS

File diff suppressed because it is too large Load Diff

View File

@ -1,313 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
; CHECK: - Name: test0
; CHECK: SymbolName: 'test0@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: a
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: b
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test0(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK: - Name: test8
; CHECK: SymbolName: 'test8@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: a
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: b
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test8(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) #0 {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK: - Name: test16
; CHECK: SymbolName: 'test16@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: a
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: b
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test16(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) #1 {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK: - Name: test24
; CHECK: SymbolName: 'test24@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: a
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: b
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test24(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) #2 {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK: - Name: test32
; CHECK: SymbolName: 'test32@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: a
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: b
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test32(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) #3 {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK: - Name: test48
; CHECK: SymbolName: 'test48@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: a
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: b
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenDefaultQueue
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenCompletionAction
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test48(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) #4 {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK: - Name: test56
; CHECK: SymbolName: 'test56@kd'
; CHECK: Args:
; CHECK-NEXT: - Name: r
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: a
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Name: b
; CHECK-NEXT: Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: GlobalBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenDefaultQueue
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenCompletionAction
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test56(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) #5 {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to
; avoid optimizing out the implicit argument allocation.
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="8" }
attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="16" }
attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" }
attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" }
attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}

View File

@ -1,95 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
%opencl.image1d_t = type opaque
%opencl.image1d_array_t = type opaque
%opencl.image1d_buffer_t = type opaque
%opencl.image2d_t = type opaque
%opencl.image2d_array_t = type opaque
%opencl.image2d_array_depth_t = type opaque
%opencl.image2d_array_msaa_t = type opaque
%opencl.image2d_array_msaa_depth_t = type opaque
%opencl.image2d_depth_t = type opaque
%opencl.image2d_msaa_t = type opaque
%opencl.image2d_msaa_depth_t = type opaque
%opencl.image3d_t = type opaque
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
; CHECK: - Name: test
; CHECK: SymbolName: 'test@kd'
; CHECK: Args:
; CHECK: - Name: a
; CHECK: TypeName: image1d_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: b
; CHECK: TypeName: image1d_array_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: c
; CHECK: TypeName: image1d_buffer_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: d
; CHECK: TypeName: image2d_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: e
; CHECK: TypeName: image2d_array_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: f
; CHECK: TypeName: image2d_array_depth_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: g
; CHECK: TypeName: image2d_array_msaa_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: h
; CHECK: TypeName: image2d_array_msaa_depth_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: i
; CHECK: TypeName: image2d_depth_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: j
; CHECK: TypeName: image2d_msaa_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: k
; CHECK: TypeName: image2d_msaa_depth_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
; CHECK: - Name: l
; CHECK: TypeName: image3d_t
; CHECK: Size: 8
; CHECK: ValueKind: Image
define amdgpu_kernel void @test(ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c,
ptr addrspace(1) %d,
ptr addrspace(1) %e,
ptr addrspace(1) %f,
ptr addrspace(1) %g,
ptr addrspace(1) %h,
ptr addrspace(1) %i,
ptr addrspace(1) %j,
ptr addrspace(1) %k,
ptr addrspace(1) %l)
!kernel_arg_type !1 !kernel_arg_base_type !1 {
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t",
!"image2d_t", !"image2d_array_t", !"image2d_array_depth_t",
!"image2d_array_msaa_t", !"image2d_array_msaa_depth_t",
!"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t",
!"image3d_t"}

View File

@ -1,11 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: ...
!opencl.ocl.version = !{}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}

View File

@ -1,14 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.
; CHECK: ---
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
; CHECK-NEXT: - 0
; CHECK: ...
!opencl.ocl.version = !{!0}
!llvm.module.flags = !{!1}
!0 = !{}
!1 = !{i32 1, !"amdgpu_code_object_version", i32 300}

View File

@ -1,12 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: ...
!opencl.ocl.version = !{!0}
!llvm.module.flags = !{!1}
!0 = !{}
!1 = !{i32 1, !"amdgpu_code_object_version", i32 200}

View File

@ -1,12 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
; Make sure llc does not crash for invalid opencl version metadata.
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: ...
!opencl.ocl.version = !{!0}
!llvm.module.flags = !{!1}
!0 = !{i32 1}
!1 = !{i32 1, !"amdgpu_code_object_version", i32 200}

View File

@ -1,173 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900 %s
@var = addrspace(1) global float 0.0
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
; CHECK: Kernels:
; CHECK-LABEL: - Name: test
; CHECK: SymbolName: 'test@kd'
; CHECK: CodeProps:
; CHECK: KernargSegmentSize: 24
; CHECK: GroupSegmentFixedSize: 0
; CHECK: PrivateSegmentFixedSize: 0
; CHECK: KernargSegmentAlign: 8
; CHECK: WavefrontSize: 64
; CHECK: NumSGPRs: 6
; CHECK: NumVGPRs: {{3|6}}
; CHECK: MaxFlatWorkGroupSize: 1024
define amdgpu_kernel void @test(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK-LABEL: - Name: test_max_flat_workgroup_size
; CHECK: SymbolName: 'test_max_flat_workgroup_size@kd'
; CHECK: CodeProps:
; CHECK: KernargSegmentSize: 24
; CHECK: GroupSegmentFixedSize: 0
; CHECK: PrivateSegmentFixedSize: 0
; CHECK: KernargSegmentAlign: 8
; CHECK: WavefrontSize: 64
; CHECK: NumSGPRs: 6
; CHECK: NumVGPRs: {{3|6}}
; CHECK: MaxFlatWorkGroupSize: 256
define amdgpu_kernel void @test_max_flat_workgroup_size(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) #2 {
entry:
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = fadd half %a.val, %b.val
store half %r.val, ptr addrspace(1) %r
ret void
}
; CHECK-LABEL: - Name: num_spilled_sgprs
; CHECK: SymbolName: 'num_spilled_sgprs@kd'
; CHECK: CodeProps:
; GFX700: NumSpilledSGPRs: 38
; GFX803: NumSpilledSGPRs: 22
; GFX900: NumSpilledSGPRs: {{22|48}}
define amdgpu_kernel void @num_spilled_sgprs(
ptr addrspace(1) %out0, ptr addrspace(1) %out1, [8 x i32],
ptr addrspace(1) %out2, ptr addrspace(1) %out3, [8 x i32],
ptr addrspace(1) %out4, ptr addrspace(1) %out5, [8 x i32],
ptr addrspace(1) %out6, ptr addrspace(1) %out7, [8 x i32],
ptr addrspace(1) %out8, ptr addrspace(1) %out9, [8 x i32],
ptr addrspace(1) %outa, ptr addrspace(1) %outb, [8 x i32],
ptr addrspace(1) %outc, ptr addrspace(1) %outd, [8 x i32],
ptr addrspace(1) %oute, ptr addrspace(1) %outf, [8 x i32],
i32 %in0, i32 %in1, i32 %in2, i32 %in3, [8 x i32],
i32 %in4, i32 %in5, i32 %in6, i32 %in7, [8 x i32],
i32 %in8, i32 %in9, i32 %ina, i32 %inb, [8 x i32],
i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
entry:
store i32 %in0, ptr addrspace(1) %out0
store i32 %in1, ptr addrspace(1) %out1
store i32 %in2, ptr addrspace(1) %out2
store i32 %in3, ptr addrspace(1) %out3
store i32 %in4, ptr addrspace(1) %out4
store i32 %in5, ptr addrspace(1) %out5
store i32 %in6, ptr addrspace(1) %out6
store i32 %in7, ptr addrspace(1) %out7
store i32 %in8, ptr addrspace(1) %out8
store i32 %in9, ptr addrspace(1) %out9
store i32 %ina, ptr addrspace(1) %outa
store i32 %inb, ptr addrspace(1) %outb
store i32 %inc, ptr addrspace(1) %outc
store i32 %ind, ptr addrspace(1) %outd
store i32 %ine, ptr addrspace(1) %oute
store i32 %inf, ptr addrspace(1) %outf
ret void
}
; CHECK-LABEL: - Name: num_spilled_vgprs
; CHECK: SymbolName: 'num_spilled_vgprs@kd'
; CHECK: CodeProps:
; CHECK: NumSpilledVGPRs: {{13|14}}
define amdgpu_kernel void @num_spilled_vgprs() #1 {
%val0 = load volatile float, ptr addrspace(1) @var
%val1 = load volatile float, ptr addrspace(1) @var
%val2 = load volatile float, ptr addrspace(1) @var
%val3 = load volatile float, ptr addrspace(1) @var
%val4 = load volatile float, ptr addrspace(1) @var
%val5 = load volatile float, ptr addrspace(1) @var
%val6 = load volatile float, ptr addrspace(1) @var
%val7 = load volatile float, ptr addrspace(1) @var
%val8 = load volatile float, ptr addrspace(1) @var
%val9 = load volatile float, ptr addrspace(1) @var
%val10 = load volatile float, ptr addrspace(1) @var
%val11 = load volatile float, ptr addrspace(1) @var
%val12 = load volatile float, ptr addrspace(1) @var
%val13 = load volatile float, ptr addrspace(1) @var
%val14 = load volatile float, ptr addrspace(1) @var
%val15 = load volatile float, ptr addrspace(1) @var
%val16 = load volatile float, ptr addrspace(1) @var
%val17 = load volatile float, ptr addrspace(1) @var
%val18 = load volatile float, ptr addrspace(1) @var
%val19 = load volatile float, ptr addrspace(1) @var
%val20 = load volatile float, ptr addrspace(1) @var
%val21 = load volatile float, ptr addrspace(1) @var
%val22 = load volatile float, ptr addrspace(1) @var
%val23 = load volatile float, ptr addrspace(1) @var
%val24 = load volatile float, ptr addrspace(1) @var
%val25 = load volatile float, ptr addrspace(1) @var
%val26 = load volatile float, ptr addrspace(1) @var
%val27 = load volatile float, ptr addrspace(1) @var
%val28 = load volatile float, ptr addrspace(1) @var
%val29 = load volatile float, ptr addrspace(1) @var
%val30 = load volatile float, ptr addrspace(1) @var
store volatile float %val0, ptr addrspace(1) @var
store volatile float %val1, ptr addrspace(1) @var
store volatile float %val2, ptr addrspace(1) @var
store volatile float %val3, ptr addrspace(1) @var
store volatile float %val4, ptr addrspace(1) @var
store volatile float %val5, ptr addrspace(1) @var
store volatile float %val6, ptr addrspace(1) @var
store volatile float %val7, ptr addrspace(1) @var
store volatile float %val8, ptr addrspace(1) @var
store volatile float %val9, ptr addrspace(1) @var
store volatile float %val10, ptr addrspace(1) @var
store volatile float %val11, ptr addrspace(1) @var
store volatile float %val12, ptr addrspace(1) @var
store volatile float %val13, ptr addrspace(1) @var
store volatile float %val14, ptr addrspace(1) @var
store volatile float %val15, ptr addrspace(1) @var
store volatile float %val16, ptr addrspace(1) @var
store volatile float %val17, ptr addrspace(1) @var
store volatile float %val18, ptr addrspace(1) @var
store volatile float %val19, ptr addrspace(1) @var
store volatile float %val20, ptr addrspace(1) @var
store volatile float %val21, ptr addrspace(1) @var
store volatile float %val22, ptr addrspace(1) @var
store volatile float %val23, ptr addrspace(1) @var
store volatile float %val24, ptr addrspace(1) @var
store volatile float %val25, ptr addrspace(1) @var
store volatile float %val26, ptr addrspace(1) @var
store volatile float %val27, ptr addrspace(1) @var
store volatile float %val28, ptr addrspace(1) @var
store volatile float %val29, ptr addrspace(1) @var
store volatile float %val30, ptr addrspace(1) @var
ret void
}
attributes #0 = { "amdgpu-num-sgpr"="14" }
attributes #1 = { "amdgpu-num-vgpr"="20" }
attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}

View File

@ -1,62 +1,61 @@
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 | FileCheck --check-prefixes=NONHSA-SI600 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 | FileCheck --check-prefixes=NONHSA-SI601 %s
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 | FileCheck --check-prefixes=NONHSA-SI602 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=HSA,HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefixes=HSA,HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefixes=HSA,HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefixes=HSA,HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefixes=HSA,HSA-CI702 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefixes=HSA,HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 | FileCheck --check-prefixes=HSA,HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire | FileCheck --check-prefixes=HSA,HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 | FileCheck --check-prefixes=HSA,HSA-CI705 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefixes=HSA,HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 | FileCheck --check-prefixes=HSA,HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 | FileCheck --check-prefixes=HSA,HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro | FileCheck --check-prefixes=HSA,HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefixes=HSA,HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney | FileCheck --check-prefixes=HSA,HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA-CI702 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 | FileCheck --check-prefix=HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire | FileCheck --check-prefix=HSA-CI704 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 | FileCheck --check-prefix=HSA-CI705 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefix=HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefix=HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI802 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 | FileCheck --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 | FileCheck --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 | FileCheck --check-prefix=HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro | FileCheck --check-prefix=HSA-VI805 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney | FileCheck --check-prefix=HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX900 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA-GFX901 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX902 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA-GFX903 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 | FileCheck --check-prefix=HSA-GFX905 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefix=HSA-GFX907 %s
; HSA: .hsa_code_object_version 2,1
; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600"
; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx601"
; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx602"
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU"
; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU"
; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU"
; HSA-CI704: .hsa_code_object_isa 7,0,4,"AMD","AMDGPU"
; HSA-CI705: .hsa_code_object_isa 7,0,5,"AMD","AMDGPU"
; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU"
; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
; HSA-VI805: .hsa_code_object_isa 8,0,5,"AMD","AMDGPU"
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU"
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU"
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU"
; HSA-CI700: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
; HSA-CI701: .amdgcn_target "amdgcn-unknown-amdhsa--gfx701"
; HSA-CI702: .amdgcn_target "amdgcn-unknown-amdhsa--gfx702"
; HSA-CI703: .amdgcn_target "amdgcn-unknown-amdhsa--gfx703"
; HSA-CI704: .amdgcn_target "amdgcn-unknown-amdhsa--gfx704"
; HSA-CI705: .amdgcn_target "amdgcn-unknown-amdhsa--gfx705"
; HSA-VI801: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801"
; HSA-VI802: .amdgcn_target "amdgcn-unknown-amdhsa--gfx802"
; HSA-VI803: .amdgcn_target "amdgcn-unknown-amdhsa--gfx803"
; HSA-VI805: .amdgcn_target "amdgcn-unknown-amdhsa--gfx805"
; HSA-VI810: .amdgcn_target "amdgcn-unknown-amdhsa--gfx810"
; HSA-GFX900: .amdgcn_target "amdgcn-unknown-amdhsa--gfx900:xnack-"
; HSA-GFX901: .amdgcn_target "amdgcn-unknown-amdhsa--gfx900"
; HSA-GFX902: .amdgcn_target "amdgcn-unknown-amdhsa--gfx902:xnack-"
; HSA-GFX903: .amdgcn_target "amdgcn-unknown-amdhsa--gfx902"
; HSA-GFX904: .amdgcn_target "amdgcn-unknown-amdhsa--gfx904:xnack-"
; HSA-GFX905: .amdgcn_target "amdgcn-unknown-amdhsa--gfx904"
; HSA-GFX906: .amdgcn_target "amdgcn-unknown-amdhsa--gfx906:xnack-"
; HSA-GFX907: .amdgcn_target "amdgcn-unknown-amdhsa--gfx906"
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,14 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
; The SHT_NOTE section contains the output from the .hsa_code_object_*
; directives.
@ -26,39 +26,66 @@
; ELF: SHF_ALLOC (0x2)
; ELF: ]
; ELF: SectionData (
; ELF: 0000: 04000000 08000000 01000000 414D4400
; ELF: 0010: 02000000 01000000 04000000 1B000000
; ELF: 0020: 03000000 414D4400 04000700 07000000
; ELF: 0030: 00000000 00000000 414D4400 414D4447
; ELF: 0040: 50550000
; ELF: 0000: 07000000 A8020000 20000000 414D4447
; ELF: 0010: 50550000 83AE616D 64687361 2E6B6572
; ELF: 0020: 6E656C73 928DA52E 61726773 9185AE2E
; ELF: 0030: 61646472 6573735F 73706163 65A6676C
; ELF: 0040: 6F62616C A52E6E61 6D65A36F 7574A72E
; ELF: 0050: 6F666673 657400A5 2E73697A 6508AB2E
; ELF: 0060: 76616C75 655F6B69 6E64AD67 6C6F6261
; ELF: 0070: 6C5F6275 66666572 B92E6772 6F75705F
; ELF: 0080: 7365676D 656E745F 66697865 645F7369
; ELF: 0090: 7A6500B6 2E6B6572 6E617267 5F736567
; ELF: 00A0: 6D656E74 5F616C69 676E08B5 2E6B6572
; ELF: 00B0: 6E617267 5F736567 6D656E74 5F73697A
; ELF: 00C0: 6508B82E 6D61785F 666C6174 5F776F72
; ELF: 00D0: 6B67726F 75705F73 697A65CD 0400A52E
; ELF: 00E0: 6E616D65 A673696D 706C65BB 2E707269
; ELF: 00F0: 76617465 5F736567 6D656E74 5F666978
; ELF: 0100: 65645F73 697A6500 AB2E7367 70725F63
; ELF: 0110: 6F756E74 06B12E73 6770725F 7370696C
; ELF: 0120: 6C5F636F 756E7400 A72E7379 6D626F6C
; ELF: 0130: A973696D 706C652E 6B64AB2E 76677072
; ELF: 0140: 5F636F75 6E7403B1 2E766770 725F7370
; ELF: 0150: 696C6C5F 636F756E 7400AF2E 77617665
; ELF: 0160: 66726F6E 745F7369 7A65408D A52E6172
; ELF: 0170: 677390B9 2E67726F 75705F73 65676D65
; ELF: 0180: 6E745F66 69786564 5F73697A 6500B62E
; ELF: 0190: 6B65726E 6172675F 7365676D 656E745F
; ELF: 01A0: 616C6967 6E04B52E 6B65726E 6172675F
; ELF: 01B0: 7365676D 656E745F 73697A65 00B82E6D
; ELF: 01C0: 61785F66 6C61745F 776F726B 67726F75
; ELF: 01D0: 705F7369 7A65CD04 00A52E6E 616D65B2
; ELF: 01E0: 73696D70 6C655F6E 6F5F6B65 726E6172
; ELF: 01F0: 6773BB2E 70726976 6174655F 7365676D
; ELF: 0200: 656E745F 66697865 645F7369 7A6500AB
; ELF: 0210: 2E736770 725F636F 756E7400 B12E7367
; ELF: 0220: 70725F73 70696C6C 5F636F75 6E7400A7
; ELF: 0230: 2E73796D 626F6CB5 73696D70 6C655F6E
; ELF: 0240: 6F5F6B65 726E6172 67732E6B 64AB2E76
; ELF: 0250: 6770725F 636F756E 7402B12E 76677072
; ELF: 0260: 5F737069 6C6C5F63 6F756E74 00AF2E77
; ELF: 0270: 61766566 726F6E74 5F73697A 6540AD61
; ELF: 0280: 6D646873 612E7461 72676574 BD616D64
; ELF: 0290: 67636E2D 756E6B6E 6F776E2D 616D6468
; ELF: 02A0: 73612D2D 67667837 3030AE61 6D646873
; ELF: 02B0: 612E7665 7273696F 6E920101
; ELF: )
; ELF: Symbol {
; ELF: Name: simple
; ELF: Size: 288
; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
; ELF: Size: 32
; ELF: }
; HSA-NOT: .AMDGPU.config
; HSA: .text
; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801"
; HSA-LABEL: .amdgpu_hsa_kernel simple
; HSA: {{^}}simple:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; PRE-GFX10: enable_wavefront_size32 = 0
; GFX10-W32: .amdhsa_wavefront_size32 1
; GFX10-W64: .amdhsa_wavefront_size32 0
; HSA-LABEL: {{^}}simple:
; PRE-GFX10: wavefront_size = 6
; HSA: call_convention = -1
; HSA: .end_amd_kernel_code_t
; HSA: s_load_{{dwordx2|b64}} s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
; Make sure we are setting the ATC bit:
@ -69,6 +96,13 @@
; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; PRE-GFX10-NOT: .amdhsa_wavefront_size32
; GFX10-W32: .amdhsa_wavefront_size32 1
; GFX10-W64: .amdhsa_wavefront_size32 0
; HSA: .Lfunc_end0:
; HSA: .size simple, .Lfunc_end0-simple
@ -78,8 +112,8 @@ entry:
ret void
}
; HSA-LABEL: .amdgpu_hsa_kernel simple_no_kernargs
; HSA: enable_sgpr_kernarg_segment_ptr = 0
; HSA-LABEL: {{^}}simple_no_kernargs:
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0
define amdgpu_kernel void @simple_no_kernargs() {
entry:
store volatile i32 0, ptr addrspace(1) undef
@ -87,4 +121,4 @@ entry:
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -7,75 +7,7 @@
define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
; GCN-LABEL: test_indirect_call_sgpr_ptr:
; GCN: .amd_kernel_code_t
; GCN-NEXT: amd_code_version_major = 1
; GCN-NEXT: amd_code_version_minor = 2
; GCN-NEXT: amd_machine_kind = 1
; GCN-NEXT: amd_machine_version_major = 7
; GCN-NEXT: amd_machine_version_minor = 0
; GCN-NEXT: amd_machine_version_stepping = 0
; GCN-NEXT: kernel_code_entry_byte_offset = 256
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
; GCN-NEXT: granulated_workitem_vgpr_count = 10
; GCN-NEXT: granulated_wavefront_sgpr_count = 8
; GCN-NEXT: priority = 0
; GCN-NEXT: float_mode = 240
; GCN-NEXT: priv = 0
; GCN-NEXT: enable_dx10_clamp = 1
; GCN-NEXT: debug_mode = 0
; GCN-NEXT: enable_ieee_mode = 1
; GCN-NEXT: enable_wgp_mode = 0
; GCN-NEXT: enable_mem_ordered = 0
; GCN-NEXT: enable_fwd_progress = 0
; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
; GCN-NEXT: user_sgpr_count = 14
; GCN-NEXT: enable_trap_handler = 0
; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
; GCN-NEXT: enable_sgpr_workgroup_info = 0
; GCN-NEXT: enable_vgpr_workitem_id = 2
; GCN-NEXT: enable_exception_msb = 0
; GCN-NEXT: granulated_lds_size = 0
; GCN-NEXT: enable_exception = 0
; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
; GCN-NEXT: enable_sgpr_queue_ptr = 1
; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
; GCN-NEXT: enable_sgpr_dispatch_id = 1
; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
; GCN-NEXT: enable_sgpr_private_segment_size = 0
; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
; GCN-NEXT: enable_wavefront_size32 = 0
; GCN-NEXT: enable_ordered_append_gds = 0
; GCN-NEXT: private_element_size = 1
; GCN-NEXT: is_ptr64 = 1
; GCN-NEXT: is_dynamic_callstack = 1
; GCN-NEXT: is_debug_enabled = 0
; GCN-NEXT: is_xnack_enabled = 0
; GCN-NEXT: workitem_private_segment_byte_size = 16384
; GCN-NEXT: workgroup_group_segment_byte_size = 0
; GCN-NEXT: gds_segment_byte_size = 0
; GCN-NEXT: kernarg_segment_byte_size = 64
; GCN-NEXT: workgroup_fbarrier_count = 0
; GCN-NEXT: wavefront_sgpr_count = 68
; GCN-NEXT: workitem_vgpr_count = 42
; GCN-NEXT: reserved_vgpr_first = 0
; GCN-NEXT: reserved_vgpr_count = 0
; GCN-NEXT: reserved_sgpr_first = 0
; GCN-NEXT: reserved_sgpr_count = 0
; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
; GCN-NEXT: kernarg_segment_alignment = 4
; GCN-NEXT: group_segment_alignment = 4
; GCN-NEXT: private_segment_alignment = 4
; GCN-NEXT: wavefront_size = 6
; GCN-NEXT: call_convention = -1
; GCN-NEXT: runtime_loader_kernel_symbol = 0
; GCN-NEXT: .end_amd_kernel_code_t
; GCN-NEXT: ; %bb.0:
; GCN: ; %bb.0:
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-NEXT: s_add_i32 s12, s12, s17
@ -100,75 +32,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
; GCN-NEXT: s_endpgm
;
; GISEL-LABEL: test_indirect_call_sgpr_ptr:
; GISEL: .amd_kernel_code_t
; GISEL-NEXT: amd_code_version_major = 1
; GISEL-NEXT: amd_code_version_minor = 2
; GISEL-NEXT: amd_machine_kind = 1
; GISEL-NEXT: amd_machine_version_major = 7
; GISEL-NEXT: amd_machine_version_minor = 0
; GISEL-NEXT: amd_machine_version_stepping = 0
; GISEL-NEXT: kernel_code_entry_byte_offset = 256
; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
; GISEL-NEXT: granulated_workitem_vgpr_count = 10
; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
; GISEL-NEXT: priority = 0
; GISEL-NEXT: float_mode = 240
; GISEL-NEXT: priv = 0
; GISEL-NEXT: enable_dx10_clamp = 1
; GISEL-NEXT: debug_mode = 0
; GISEL-NEXT: enable_ieee_mode = 1
; GISEL-NEXT: enable_wgp_mode = 0
; GISEL-NEXT: enable_mem_ordered = 0
; GISEL-NEXT: enable_fwd_progress = 0
; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
; GISEL-NEXT: user_sgpr_count = 14
; GISEL-NEXT: enable_trap_handler = 0
; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
; GISEL-NEXT: enable_sgpr_workgroup_info = 0
; GISEL-NEXT: enable_vgpr_workitem_id = 2
; GISEL-NEXT: enable_exception_msb = 0
; GISEL-NEXT: granulated_lds_size = 0
; GISEL-NEXT: enable_exception = 0
; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
; GISEL-NEXT: enable_sgpr_queue_ptr = 1
; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
; GISEL-NEXT: enable_sgpr_dispatch_id = 1
; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
; GISEL-NEXT: enable_sgpr_private_segment_size = 0
; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
; GISEL-NEXT: enable_wavefront_size32 = 0
; GISEL-NEXT: enable_ordered_append_gds = 0
; GISEL-NEXT: private_element_size = 1
; GISEL-NEXT: is_ptr64 = 1
; GISEL-NEXT: is_dynamic_callstack = 1
; GISEL-NEXT: is_debug_enabled = 0
; GISEL-NEXT: is_xnack_enabled = 0
; GISEL-NEXT: workitem_private_segment_byte_size = 16384
; GISEL-NEXT: workgroup_group_segment_byte_size = 0
; GISEL-NEXT: gds_segment_byte_size = 0
; GISEL-NEXT: kernarg_segment_byte_size = 64
; GISEL-NEXT: workgroup_fbarrier_count = 0
; GISEL-NEXT: wavefront_sgpr_count = 68
; GISEL-NEXT: workitem_vgpr_count = 42
; GISEL-NEXT: reserved_vgpr_first = 0
; GISEL-NEXT: reserved_vgpr_count = 0
; GISEL-NEXT: reserved_sgpr_first = 0
; GISEL-NEXT: reserved_sgpr_count = 0
; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
; GISEL-NEXT: kernarg_segment_alignment = 4
; GISEL-NEXT: group_segment_alignment = 4
; GISEL-NEXT: private_segment_alignment = 4
; GISEL-NEXT: wavefront_size = 6
; GISEL-NEXT: call_convention = -1
; GISEL-NEXT: runtime_loader_kernel_symbol = 0
; GISEL-NEXT: .end_amd_kernel_code_t
; GISEL-NEXT: ; %bb.0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
; GISEL-NEXT: s_add_i32 s12, s12, s17
@ -198,75 +62,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
; GCN: .amd_kernel_code_t
; GCN-NEXT: amd_code_version_major = 1
; GCN-NEXT: amd_code_version_minor = 2
; GCN-NEXT: amd_machine_kind = 1
; GCN-NEXT: amd_machine_version_major = 7
; GCN-NEXT: amd_machine_version_minor = 0
; GCN-NEXT: amd_machine_version_stepping = 0
; GCN-NEXT: kernel_code_entry_byte_offset = 256
; GCN-NEXT: kernel_code_prefetch_byte_size = 0
; GCN-NEXT: granulated_workitem_vgpr_count = 10
; GCN-NEXT: granulated_wavefront_sgpr_count = 8
; GCN-NEXT: priority = 0
; GCN-NEXT: float_mode = 240
; GCN-NEXT: priv = 0
; GCN-NEXT: enable_dx10_clamp = 1
; GCN-NEXT: debug_mode = 0
; GCN-NEXT: enable_ieee_mode = 1
; GCN-NEXT: enable_wgp_mode = 0
; GCN-NEXT: enable_mem_ordered = 0
; GCN-NEXT: enable_fwd_progress = 0
; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
; GCN-NEXT: user_sgpr_count = 14
; GCN-NEXT: enable_trap_handler = 0
; GCN-NEXT: enable_sgpr_workgroup_id_x = 1
; GCN-NEXT: enable_sgpr_workgroup_id_y = 1
; GCN-NEXT: enable_sgpr_workgroup_id_z = 1
; GCN-NEXT: enable_sgpr_workgroup_info = 0
; GCN-NEXT: enable_vgpr_workitem_id = 2
; GCN-NEXT: enable_exception_msb = 0
; GCN-NEXT: granulated_lds_size = 0
; GCN-NEXT: enable_exception = 0
; GCN-NEXT: enable_sgpr_private_segment_buffer = 1
; GCN-NEXT: enable_sgpr_dispatch_ptr = 1
; GCN-NEXT: enable_sgpr_queue_ptr = 1
; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1
; GCN-NEXT: enable_sgpr_dispatch_id = 1
; GCN-NEXT: enable_sgpr_flat_scratch_init = 1
; GCN-NEXT: enable_sgpr_private_segment_size = 0
; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0
; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0
; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0
; GCN-NEXT: enable_wavefront_size32 = 0
; GCN-NEXT: enable_ordered_append_gds = 0
; GCN-NEXT: private_element_size = 1
; GCN-NEXT: is_ptr64 = 1
; GCN-NEXT: is_dynamic_callstack = 1
; GCN-NEXT: is_debug_enabled = 0
; GCN-NEXT: is_xnack_enabled = 0
; GCN-NEXT: workitem_private_segment_byte_size = 16384
; GCN-NEXT: workgroup_group_segment_byte_size = 0
; GCN-NEXT: gds_segment_byte_size = 0
; GCN-NEXT: kernarg_segment_byte_size = 64
; GCN-NEXT: workgroup_fbarrier_count = 0
; GCN-NEXT: wavefront_sgpr_count = 68
; GCN-NEXT: workitem_vgpr_count = 42
; GCN-NEXT: reserved_vgpr_first = 0
; GCN-NEXT: reserved_vgpr_count = 0
; GCN-NEXT: reserved_sgpr_first = 0
; GCN-NEXT: reserved_sgpr_count = 0
; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
; GCN-NEXT: debug_private_segment_buffer_sgpr = 0
; GCN-NEXT: kernarg_segment_alignment = 4
; GCN-NEXT: group_segment_alignment = 4
; GCN-NEXT: private_segment_alignment = 4
; GCN-NEXT: wavefront_size = 6
; GCN-NEXT: call_convention = -1
; GCN-NEXT: runtime_loader_kernel_symbol = 0
; GCN-NEXT: .end_amd_kernel_code_t
; GCN-NEXT: ; %bb.0:
; GCN: ; %bb.0:
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
; GCN-NEXT: s_add_i32 s12, s12, s17
@ -292,75 +88,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
; GCN-NEXT: s_endpgm
;
; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
; GISEL: .amd_kernel_code_t
; GISEL-NEXT: amd_code_version_major = 1
; GISEL-NEXT: amd_code_version_minor = 2
; GISEL-NEXT: amd_machine_kind = 1
; GISEL-NEXT: amd_machine_version_major = 7
; GISEL-NEXT: amd_machine_version_minor = 0
; GISEL-NEXT: amd_machine_version_stepping = 0
; GISEL-NEXT: kernel_code_entry_byte_offset = 256
; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
; GISEL-NEXT: granulated_workitem_vgpr_count = 10
; GISEL-NEXT: granulated_wavefront_sgpr_count = 8
; GISEL-NEXT: priority = 0
; GISEL-NEXT: float_mode = 240
; GISEL-NEXT: priv = 0
; GISEL-NEXT: enable_dx10_clamp = 1
; GISEL-NEXT: debug_mode = 0
; GISEL-NEXT: enable_ieee_mode = 1
; GISEL-NEXT: enable_wgp_mode = 0
; GISEL-NEXT: enable_mem_ordered = 0
; GISEL-NEXT: enable_fwd_progress = 0
; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
; GISEL-NEXT: user_sgpr_count = 14
; GISEL-NEXT: enable_trap_handler = 0
; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
; GISEL-NEXT: enable_sgpr_workgroup_info = 0
; GISEL-NEXT: enable_vgpr_workitem_id = 2
; GISEL-NEXT: enable_exception_msb = 0
; GISEL-NEXT: granulated_lds_size = 0
; GISEL-NEXT: enable_exception = 0
; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
; GISEL-NEXT: enable_sgpr_queue_ptr = 1
; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
; GISEL-NEXT: enable_sgpr_dispatch_id = 1
; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
; GISEL-NEXT: enable_sgpr_private_segment_size = 0
; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
; GISEL-NEXT: enable_wavefront_size32 = 0
; GISEL-NEXT: enable_ordered_append_gds = 0
; GISEL-NEXT: private_element_size = 1
; GISEL-NEXT: is_ptr64 = 1
; GISEL-NEXT: is_dynamic_callstack = 1
; GISEL-NEXT: is_debug_enabled = 0
; GISEL-NEXT: is_xnack_enabled = 0
; GISEL-NEXT: workitem_private_segment_byte_size = 16384
; GISEL-NEXT: workgroup_group_segment_byte_size = 0
; GISEL-NEXT: gds_segment_byte_size = 0
; GISEL-NEXT: kernarg_segment_byte_size = 64
; GISEL-NEXT: workgroup_fbarrier_count = 0
; GISEL-NEXT: wavefront_sgpr_count = 68
; GISEL-NEXT: workitem_vgpr_count = 42
; GISEL-NEXT: reserved_vgpr_first = 0
; GISEL-NEXT: reserved_vgpr_count = 0
; GISEL-NEXT: reserved_sgpr_first = 0
; GISEL-NEXT: reserved_sgpr_count = 0
; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
; GISEL-NEXT: kernarg_segment_alignment = 4
; GISEL-NEXT: group_segment_alignment = 4
; GISEL-NEXT: private_segment_alignment = 4
; GISEL-NEXT: wavefront_size = 6
; GISEL-NEXT: call_convention = -1
; GISEL-NEXT: runtime_loader_kernel_symbol = 0
; GISEL-NEXT: .end_amd_kernel_code_t
; GISEL-NEXT: ; %bb.0:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
; GISEL-NEXT: s_add_i32 s12, s12, s17
@ -1899,4 +1627,4 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -5,21 +5,22 @@
; accounted for, as well as legalization of types changing offsets.
; FUNC-LABEL: {{^}}i1_arg:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
; GCN: s_load_dword s
; GCN: s_and_b32
; HSA-VI: .amdhsa_kernarg_size 12
define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind {
store i1 %x, ptr addrspace(1) %out, align 1
ret void
}
; FUNC-LABEL: {{^}}v3i8_arg:
; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
; HSA-VI: .amdhsa_kernarg_size 12
define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind {
entry:
store <3 x i8> %in, ptr addrspace(1) %out, align 4
@ -27,9 +28,9 @@ entry:
}
; FUNC-LABEL: {{^}}i65_arg:
; HSA-VI: kernarg_segment_byte_size = 24
; HSA-VI: kernarg_segment_alignment = 4
; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
; HSA-VI: .amdhsa_kernarg_size 24
define amdgpu_kernel void @i65_arg(ptr addrspace(1) nocapture %out, i65 %in) nounwind {
entry:
store i65 %in, ptr addrspace(1) %out, align 4
@ -37,7 +38,7 @@ entry:
}
; FUNC-LABEL: {{^}}empty_struct_arg:
; HSA-VI: kernarg_segment_byte_size = 0
; HSA-VI: .amdhsa_kernarg_size 0
define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
ret void
}
@ -53,11 +54,12 @@ define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
; FIXME: Total argument size is computed wrong
; FUNC-LABEL: {{^}}struct_argument_alignment:
; HSA-VI: kernarg_segment_byte_size = 40
; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
; HSA-VI: .amdhsa_kernarg_size 40
define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) {
%val0 = extractvalue {i32, i64} %arg0, 0
%val1 = extractvalue {i32, i64} %arg0, 1
@ -73,12 +75,13 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32,
; No padding between i8 and next struct, but round up at end to 4 byte
; multiple.
; FUNC-LABEL: {{^}}packed_struct_argument_alignment:
; HSA-VI: kernarg_segment_byte_size = 28
; HSA-VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; HSA-VI: global_load_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}} offset:13
; HSA-VI: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}} offset:17
; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4
; HSA-VI: .amdhsa_kernarg_size 28
define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
%val0 = extractvalue <{i32, i64}> %arg0, 0
%val1 = extractvalue <{i32, i64}> %arg0, 1
@ -92,12 +95,13 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
}
; GCN-LABEL: {{^}}struct_argument_alignment_after:
; HSA-VI: kernarg_segment_byte_size = 64
; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30
; HSA-VI: .amdhsa_kernarg_size 64
define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8, {i32, i64} %arg2, i8, <4 x i32> %arg4) {
%val0 = extractvalue {i32, i64} %arg0, 0
%val1 = extractvalue {i32, i64} %arg0, 1
@ -151,9 +155,9 @@ entry:
; Byref pointers should only be treated as offsets from kernarg
; GCN-LABEL: {{^}}byref_constant_i8_arg:
; GCN: kernarg_segment_byte_size = 12
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN: global_load_ubyte v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8
; GCN: .amdhsa_kernarg_size 12
define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) {
%in = load i8, ptr addrspace(4) %in.byref
%ext = zext i8 %in to i32
@ -162,9 +166,9 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out
}
; GCN-LABEL: {{^}}byref_constant_i16_arg:
; GCN: kernarg_segment_byte_size = 12
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN: global_load_ushort v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8
; GCN: .amdhsa_kernarg_size 12
define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) {
%in = load i16, ptr addrspace(4) %in.byref
%ext = zext i16 %in to i32
@ -173,8 +177,8 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou
}
; GCN-LABEL: {{^}}byref_constant_i32_arg:
; GCN: kernarg_segment_byte_size = 16
; GCN: s_load_dwordx4 [[LOAD:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
; GCN: .amdhsa_kernarg_size 16
define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) {
%in = load i32, ptr addrspace(4) %in.byref
store volatile i32 %in, ptr addrspace(1) %out, align 4
@ -183,9 +187,9 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou
}
; GCN-LABEL: {{^}}byref_constant_v4i32_arg:
; GCN: kernarg_segment_byte_size = 36
; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10{{$}}
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x20{{$}}
; GCN: .amdhsa_kernarg_size 36
define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) {
%in = load <4 x i32>, ptr addrspace(4) %in.byref
store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4
@ -194,12 +198,12 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %
}
; GCN-LABEL: {{^}}byref_align_constant_i32_arg:
; GCN: kernarg_segment_byte_size = 264
; GCN-DAG: s_load_dwordx2 s[[[IN:[0-9]+]]:[[AFTER_OFFSET:[0-9]+]]], s[4:5], 0x100{{$}}
; GCN-DAG: v_mov_b32_e32 [[V_IN:v[0-9]+]], s[[IN]]
; GCN-DAG: v_mov_b32_e32 [[V_AFTER_OFFSET:v[0-9]+]], s[[AFTER_OFFSET]]
; GCN: global_store_dword v{{[0-9]+}}, [[V_IN]], s
; GCN: global_store_dword v{{[0-9]+}}, [[V_AFTER_OFFSET]], s
; GCN: .amdhsa_kernarg_size 264
define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
%in = load i32, ptr addrspace(4) %in.byref
store volatile i32 %in, ptr addrspace(1) %out, align 4
@ -208,9 +212,9 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu
}
; GCN-LABEL: {{^}}byref_natural_align_constant_v16i32_arg:
; GCN: kernarg_segment_byte_size = 132
; GCN-DAG: s_load_dword s{{[0-9]+}}, s[4:5], 0x80
; GCN-DAG: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x40{{$}}
; GCN: .amdhsa_kernarg_size 132
define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
%in = load <16 x i32>, ptr addrspace(4) %in.byref
store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4
@ -220,8 +224,8 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
; Also accept byref kernel arguments with other global address spaces.
; GCN-LABEL: {{^}}byref_global_i32_arg:
; GCN: kernarg_segment_byte_size = 12
; GCN: s_load_dword [[IN:s[0-9]+]], s[4:5], 0x8{{$}}
; GCN: .amdhsa_kernarg_size 12
define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) {
%in = load i32, ptr addrspace(1) %in.byref
store i32 %in, ptr addrspace(1) %out, align 4
@ -253,8 +257,8 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu
; }
; GCN-LABEL: {{^}}multi_byref_constant_i32_arg:
; GCN: kernarg_segment_byte_size = 20
; GCN: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
; GCN: .amdhsa_kernarg_size 20
define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) {
%in0 = load i32, ptr addrspace(4) %in0.byref
%in1 = load i32, ptr addrspace(4) %in1.byref
@ -265,10 +269,10 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu
}
; GCN-LABEL: {{^}}byref_constant_i32_arg_offset0:
; GCN: kernarg_segment_byte_size = 4
; GCN-NOT: s4
; GCN-NOT: s5
; GCN: s_load_dword {{s[0-9]+}}, s[4:5], 0x0{{$}}
; GCN: .amdhsa_kernarg_size 4
define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) {
%in = load i32, ptr addrspace(4) %in.byref
store i32 %in, ptr addrspace(1) undef, align 4
@ -276,4 +280,4 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -14,7 +14,7 @@ declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3)
; HSA-LABEL: {{^}}test_no_round_size_1:
; HSA: workgroup_group_segment_byte_size = 38
; HSA: .amdhsa_group_segment_fixed_size 38
define amdgpu_kernel void @test_no_round_size_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.align16.0, i32 38, i1 false)
@ -31,8 +31,7 @@ define amdgpu_kernel void @test_no_round_size_1(ptr addrspace(1) %out, ptr addrs
; add the alignment padding if necessary alignment padding if needed.
; HSA-LABEL: {{^}}test_round_size_2:
; HSA: workgroup_group_segment_byte_size = 86
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 86
define amdgpu_kernel void @test_round_size_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.align16.0, i32 38, i1 false)
@ -45,8 +44,7 @@ define amdgpu_kernel void @test_round_size_2(ptr addrspace(1) %out, ptr addrspac
; 38 + (10 pad) + 38 (= 86)
; HSA-LABEL: {{^}}test_round_size_2_align_8:
; HSA: workgroup_group_segment_byte_size = 86
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 86
define amdgpu_kernel void @test_round_size_2_align_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false)
@ -58,8 +56,7 @@ define amdgpu_kernel void @test_round_size_2_align_8(ptr addrspace(1) %out, ptr
}
; HSA-LABEL: {{^}}test_round_local_lds_and_arg:
; HSA: workgroup_group_segment_byte_size = 38
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 38
define amdgpu_kernel void @test_round_local_lds_and_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false)
@ -70,8 +67,7 @@ define amdgpu_kernel void @test_round_local_lds_and_arg(ptr addrspace(1) %out, p
}
; HSA-LABEL: {{^}}test_round_lds_arg:
; HSA: workgroup_group_segment_byte_size = 0
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 0
define amdgpu_kernel void @test_round_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 %lds.arg, ptr addrspace(1) align 4 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 %lds.arg, i32 38, i1 false)
@ -80,8 +76,7 @@ define amdgpu_kernel void @test_round_lds_arg(ptr addrspace(1) %out, ptr addrspa
; FIXME: Parameter alignment not considered
; HSA-LABEL: {{^}}test_high_align_lds_arg:
; HSA: workgroup_group_segment_byte_size = 0
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 0
define amdgpu_kernel void @test_high_align_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) align 64 %lds.arg) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 64 %lds.arg, ptr addrspace(1) align 64 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 64 %out, ptr addrspace(3) align 64 %lds.arg, i32 38, i1 false)
@ -90,8 +85,7 @@ define amdgpu_kernel void @test_high_align_lds_arg(ptr addrspace(1) %out, ptr ad
; (39 * 4) + (4 pad) + (7 * 8) = 216
; HSA-LABEL: {{^}}test_missing_alignment_size_2_order0:
; HSA: workgroup_group_segment_byte_size = 216
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 216
define amdgpu_kernel void @test_missing_alignment_size_2_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.missing.align.0, ptr addrspace(1) align 4 %in, i32 160, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.missing.align.0, i32 160, i1 false)
@ -104,8 +98,7 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order0(ptr addrspace(1)
; (39 * 4) + (4 pad) + (7 * 8) = 216
; HSA-LABEL: {{^}}test_missing_alignment_size_2_order1:
; HSA: workgroup_group_segment_byte_size = 216
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 216
define amdgpu_kernel void @test_missing_alignment_size_2_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.missing.align.1, ptr addrspace(1) align 8 %in, i32 56, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.missing.align.1, i32 56, i1 false)
@ -119,8 +112,7 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order1(ptr addrspace(1)
; align 32, 16, 16
; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134)
; HSA-LABEL: {{^}}test_round_size_3_order0:
; HSA: workgroup_group_segment_byte_size = 134
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 134
define amdgpu_kernel void @test_round_size_3_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align32.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align32.0, i32 38, i1 false)
@ -137,8 +129,7 @@ define amdgpu_kernel void @test_round_size_3_order0(ptr addrspace(1) %out, ptr a
; align 32, 16, 16
; 38 (+ 10 pad) + 38 + (10 pad) + 38 ( = 134)
; HSA-LABEL: {{^}}test_round_size_3_order1:
; HSA: workgroup_group_segment_byte_size = 134
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 134
define amdgpu_kernel void @test_round_size_3_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align32.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align32.0, i32 38, i1 false)
@ -155,8 +146,7 @@ define amdgpu_kernel void @test_round_size_3_order1(ptr addrspace(1) %out, ptr a
; align 32, 16, 16
; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 126)
; HSA-LABEL: {{^}}test_round_size_3_order2:
; HSA: workgroup_group_segment_byte_size = 134
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 134
define amdgpu_kernel void @test_round_size_3_order2(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false)
@ -173,8 +163,7 @@ define amdgpu_kernel void @test_round_size_3_order2(ptr addrspace(1) %out, ptr a
; align 32, 16, 16
; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134)
; HSA-LABEL: {{^}}test_round_size_3_order3:
; HSA: workgroup_group_segment_byte_size = 134
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 134
define amdgpu_kernel void @test_round_size_3_order3(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false)
@ -191,8 +180,7 @@ define amdgpu_kernel void @test_round_size_3_order3(ptr addrspace(1) %out, ptr a
; align 32, 16, 16
; 38 + (10 pad) + 38 + (10 pad) + 38 (= 134)
; HSA-LABEL: {{^}}test_round_size_3_order4:
; HSA: workgroup_group_segment_byte_size = 134
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 134
define amdgpu_kernel void @test_round_size_3_order4(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align8.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align8.0, i32 38, i1 false)
@ -209,8 +197,7 @@ define amdgpu_kernel void @test_round_size_3_order4(ptr addrspace(1) %out, ptr a
; align 32, 16, 16
; 38 + (10 pad) + 38 + (10 pad) + 38 (= 134)
; HSA-LABEL: {{^}}test_round_size_3_order5:
; HSA: workgroup_group_segment_byte_size = 134
; HSA: group_segment_alignment = 4
; HSA: .amdhsa_group_segment_fixed_size 134
define amdgpu_kernel void @test_round_size_3_order5(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align8.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align8.0, i32 38, i1 false)
@ -229,4 +216,4 @@ attributes #1 = { nounwind }
attributes #2 = { convergent nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -12,8 +12,8 @@
; EG-NEXT: .long 1
; ALL: {{^}}test:
; HSA: granulated_lds_size = 0
; HSA: workgroup_group_segment_byte_size = 4
; HSA-NOT: COMPUTE_PGM_RSRC2.LDS_SIZE
; HSA: .amdhsa_group_segment_fixed_size 4
; GCN: ; LDSByteSize: 4 bytes/workgroup (compile time only)
@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
@ -36,4 +36,4 @@ endif:
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -3,12 +3,11 @@
declare i64 @llvm.amdgcn.dispatch.id() #1
; GCN-LABEL: {{^}}dispatch_id:
; GCN: .amd_kernel_code_t
; GCN: enable_sgpr_dispatch_id = 1
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
; GCN: .amdhsa_user_sgpr_dispatch_id 1
define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 {
%tmp0 = call i64 @llvm.amdgcn.dispatch.id()
store i64 %tmp0, ptr addrspace(1) %out
@ -19,4 +18,4 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -4,8 +4,8 @@
; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target
; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_dispatch_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
%dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
%value = load i32, ptr addrspace(4) %dispatch_ptr
@ -14,11 +14,11 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) {
}
; GCN-LABEL: {{^}}test2
; GCN: enable_sgpr_dispatch_ptr = 1
; GCN: s_load_dword s[[REG:[0-9]+]], s[4:5], 0x1
; GCN: s_lshr_b32 s{{[0-9]+}}, s[[REG]], 16
; GCN-NOT: load_ushort
; GCN: s_endpgm
; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
define amdgpu_kernel void @test2(ptr addrspace(1) %out) {
%dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
%d1 = getelementptr inbounds i8, ptr addrspace(4) %dispatch_ptr, i64 6
@ -33,4 +33,4 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,11 +1,8 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s
; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 56
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
@ -13,6 +10,7 @@
; HSA: s_load_dword s0, s[4:5], 0x0
; COV4: .amdhsa_kernarg_size 56
; COV5: .amdhsa_kernarg_size 256
define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@ -21,10 +19,6 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
}
; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit:
; HSA: enable_sgpr_kernarg_segment_ptr = 0
; HSA: kernarg_segment_byte_size = 0
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; MESA: kernarg_segment_alignment = 4
@ -34,6 +28,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
; MESA: s_load_dword s0, s[4:5], 0x0
; COV4: .amdhsa_kernarg_size 0
; COV5: .amdhsa_kernarg_size 0
define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@ -43,17 +38,13 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 48
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; MESA: kernarg_segment_alignment = 4
; HSA: s_load_dword s0, s[4:5], 0x0
; COV5: .amdhsa_kernarg_size 48
; HSA: .amdhsa_kernarg_size 48
define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
@ -62,16 +53,13 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 168
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
; HSA: s_load_dword s0, s[4:5], 0x1c
; COV4: .amdhsa_kernarg_size 168
; COV5: .amdhsa_kernarg_size 368
define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@ -81,17 +69,13 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 160
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
; HSA: s_load_dword s0, s[4:5], 0x1c
; COV5: .amdhsa_kernarg_size 160
; HSA: .amdhsa_kernarg_size 160
define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
@ -121,9 +105,6 @@ define void @opencl_func_implicitarg_ptr() #0 {
}
; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 56
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
@ -132,6 +113,7 @@ define void @opencl_func_implicitarg_ptr() #0 {
; GCN: s_mov_b64 s[8:9], s[4:5]
; GCN: s_swappc_b64
; COV4: .amdhsa_kernarg_size 56
; COV5: .amdhsa_kernarg_size 256
define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
call void @func_implicitarg_ptr()
@ -139,10 +121,6 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
}
; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0:
; HSA: enable_sgpr_kernarg_segment_ptr = 0
; HSA: kernarg_segment_byte_size = 0
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; MESA: kernarg_segment_alignment = 4
@ -151,16 +129,13 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
; MESA: s_mov_b64 s[8:9], s[4:5]{{$}}
; GCN: s_swappc_b64
; COV5: .amdhsa_kernarg_size 0
; HSA: .amdhsa_kernarg_size 0
define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 {
call void @func_implicitarg_ptr()
ret void
}
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 48
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; GCN: s_mov_b64 s[8:9], s[4:5]
@ -168,17 +143,13 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3
; GCN-NOT: s5
; GCN: s_swappc_b64
; COV5: .amdhsa_kernarg_size 48
; HSA: .amdhsa_kernarg_size 48
define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
call void @func_implicitarg_ptr()
ret void
}
; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 168
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
@ -189,6 +160,7 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
; GCN: s_addc_u32 s9, s5, 0{{$}}
; GCN: s_swappc_b64
; COV4: .amdhsa_kernarg_size 168
; COV5: .amdhsa_kernarg_size 368
define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
call void @func_implicitarg_ptr()
@ -196,9 +168,6 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
}
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 160
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
@ -207,7 +176,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
; GCN: s_addc_u32 s9, s5, 0{{$}}
; GCN: s_swappc_b64
; COV5: .amdhsa_kernarg_size 160
; HSA: .amdhsa_kernarg_size 160
define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
call void @func_implicitarg_ptr()
ret void
@ -273,116 +242,69 @@ define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8])
}
; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
; HSA: kernarg_segment_byte_size = 120
; HSA: kernarg_segment_alignment = 6
; MESA: kernarg_segment_byte_size = 84
; MESA: kernarg_segment_alignment = 6
; COV5: .amdhsa_kernarg_size 120
; HSA: .amdhsa_kernarg_size 120
define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
ret void
}
; HSA-LABEL: Kernels:
; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty
; HSA: CodeProps:
; HSA: KernargSegmentSize: 56
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty_0implicit
; HSA: KernargSegmentSize: 0
; HSA: KernargSegmentAlign: 4
; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr_empty
; HSA: KernargSegmentSize: 48
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_implicitarg_ptr
; HSA: KernargSegmentSize: 168
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr
; HSA: KernargSegmentSize: 160
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty
; HSA: KernargSegmentSize: 56
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty_implicit0
; HSA: KernargSegmentSize: 0
; HSA: KernargSegmentAlign: 4
; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func_empty
; HSA: KernargSegmentSize: 48
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func
; HSA: KernargSegmentSize: 168
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func
; HSA: KernargSegmentSize: 160
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_kernarg_implicitarg_ptr_func
; HSA: KernargSegmentSize: 168
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_implicitarg_no_struct_align_padding
; HSA: KernargSegmentSize: 120
; HSA: KernargSegmentAlign: 64
; COV5-LABEL: amdhsa.kernels:
; COV5: .kernarg_segment_align: 8
; HSA-LABEL: amdhsa.kernels:
; HSA: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 256
; COV5-LABEL: .name: kernel_implicitarg_ptr_empty
; COV4-NEXT: .kernarg_segment_size: 56
; HSA-LABEL: .name: kernel_implicitarg_ptr_empty
; COV5: .kernarg_segment_align: 4
; COV5-NEXT: .kernarg_segment_size: 0
; COV5-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit
; HSA: .kernarg_segment_align: 4
; HSA-NEXT: .kernarg_segment_size: 0
; HSA-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit
; COV5: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 48
; COV5-LABEL: .name: opencl_kernel_implicitarg_ptr_empty
; HSA: .kernarg_segment_align: 8
; HSA-NEXT: .kernarg_segment_size: 48
; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr_empty
; COV5: .kernarg_segment_align: 8
; HSA: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 368
; COV5-LABEL: .name: kernel_implicitarg_ptr
; COV4-NEXT: .kernarg_segment_size: 168
; HSA-LABEL: .name: kernel_implicitarg_ptr
; COV5: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 160
; COV5-LABEL: .name: opencl_kernel_implicitarg_ptr
; HSA: .kernarg_segment_align: 8
; HSA-NEXT: .kernarg_segment_size: 160
; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr
; COV5: .kernarg_segment_align: 8
; HSA: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 256
; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func_empty
; COV4-NEXT: .kernarg_segment_size: 56
; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty
; COV5: .kernarg_segment_align: 4
; COV5-NEXT: .kernarg_segment_size: 0
; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0
; HSA: .kernarg_segment_align: 4
; HSA-NEXT: .kernarg_segment_size: 0
; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0
; COV5: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 48
; COV5-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty
; HSA: .kernarg_segment_align: 8
; HSA-NEXT: .kernarg_segment_size: 48
; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty
; COV5: .kernarg_segment_align: 8
; HSA: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 368
; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func
; COV4-NEXT: .kernarg_segment_size: 168
; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func
; COV5: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 160
; COV5-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func
; HSA: .kernarg_segment_align: 8
; HSA-NEXT: .kernarg_segment_size: 160
; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func
; COV5: .kernarg_segment_align: 8
; HSA: .kernarg_segment_align: 8
; COV5-NEXT: .kernarg_segment_size: 368
; COV5-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func
; COV4-NEXT: .kernarg_segment_size: 168
; HSA-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func
; COV5: .kernarg_segment_align: 64
; COV5-NEXT: .kernarg_segment_size: 120
; COV5-LABEL: .name: kernel_implicitarg_no_struct_align_padding
; HSA: .kernarg_segment_align: 64
; HSA-NEXT: .kernarg_segment_size: 120
; HSA-LABEL: .name: kernel_implicitarg_no_struct_align_padding
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2

View File

@ -1,15 +1,15 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s
; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,HSA,ALL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,OS-MESA3D,ALL %s
; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s
; ALL-LABEL: {{^}}test:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 8
; HSA: kernarg_segment_alignment = 4
; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1
; CO-V4: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
; HSA: .amdhsa_kernarg_size 8
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
%kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
%gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
@ -19,12 +19,12 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_implicit:
; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 24
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
; HSA: .amdhsa_kernarg_size 8
define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10
@ -34,10 +34,8 @@ define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_implicit_alignment:
; HSA: kernarg_segment_byte_size = 72
; OS-MESA3D: kernarg_segment_byte_size = 28
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@ -45,6 +43,8 @@ define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; MESA: buffer_store_dword [[V_VAL]]
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
; HSA: .amdhsa_kernarg_size 12
define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load i32, ptr addrspace(4) %implicitarg.ptr
@ -53,10 +53,8 @@ define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment
; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 28
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@ -64,6 +62,7 @@ define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; MESA: buffer_store_dword [[V_VAL]]
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
; HSA: .amdhsa_kernarg_size 64
define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load i32, ptr addrspace(4) %implicitarg.ptr
@ -72,13 +71,13 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out,
}
; ALL-LABEL: {{^}}test_no_kernargs:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 0
; CO-V2: kernarg_segment_byte_size = 0
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 0
; OS-MESA3D: kernarg_segment_byte_size = 0
; OS-MESA3D: kernarg_segment_alignment = 4
; CO-V2: kernarg_segment_alignment = 4
; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; HSA: s_load_dword s{{[0-9]+}}, [[NULL]], 0xa{{$}}
; HSA: .amdhsa_kernarg_size 0
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0
define amdgpu_kernel void @test_no_kernargs() #1 {
%kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
%gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
@ -88,9 +87,9 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
; HSA: kernarg_segment_byte_size = 48
; OS-MESA3d: kernarg_segment_byte_size = 16
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_byte_size = 16
; OS-MESA3D: kernarg_segment_alignment = 4
; HSA: .amdhsa_kernarg_size 48
define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
@ -99,9 +98,9 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs()
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up:
; HSA: kernarg_segment_byte_size = 40
; OS-MESA3D: kernarg_segment_byte_size = 16
; CO-V2: kernarg_segment_alignment = 4
; OS-MESA3D: kernarg_segment_alignment = 4
; HSA: .amdhsa_kernarg_size 40
define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
%implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
@ -109,13 +108,21 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_r
ret void
}
; ALL-LABEL: {{^}}func_kernarg_segment_ptr:
; ALL: v_mov_b32_e32 v0, 0{{$}}
; ALL: v_mov_b32_e32 v1, 0{{$}}
define ptr addrspace(4) @func_kernarg_segment_ptr() {
%ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
ret ptr addrspace(4) %ptr
}
declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -4,8 +4,8 @@
; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target
; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_queue_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; GCN: .amdhsa_user_sgpr_queue_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
%queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
%value = load i32, ptr addrspace(4) %queue_ptr
@ -18,4 +18,4 @@ declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
attributes #0 = { nounwind readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,9 +1,7 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
@ -11,25 +9,25 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0
; ALL-LABEL: {{^}}test_workgroup_id_x:
; CO-V2: .amd_kernel_code_t
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 0
; CO-V2: enable_sgpr_workgroup_id_z = 0
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_vgpr_workitem_id = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; CO-V2: .end_amd_kernel_code_t
; MESA3D: .amd_kernel_code_t
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 0
; MESA3D: enable_sgpr_workgroup_id_z = 0
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_vgpr_workitem_id = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: .end_amd_kernel_code_t
; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}}
; CO-V2: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}}
; MESA3D: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}}
; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
@ -41,22 +39,22 @@ define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_workgroup_id_y:
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 1
; CO-V2: enable_sgpr_workgroup_id_z = 0
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 1
; MESA3D: enable_sgpr_workgroup_id_z = 0
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0
; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}}
; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}
; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
@ -68,30 +66,30 @@ define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
}
; ALL-LABEL: {{^}}test_workgroup_id_z:
; CO-V2: user_sgpr_count = 6
; CO-V2: enable_sgpr_workgroup_id_x = 1
; CO-V2: enable_sgpr_workgroup_id_y = 0
; CO-V2: enable_sgpr_workgroup_id_z = 1
; CO-V2: enable_sgpr_workgroup_info = 0
; CO-V2: enable_vgpr_workitem_id = 0
; CO-V2: enable_sgpr_private_segment_buffer = 1
; CO-V2: enable_sgpr_dispatch_ptr = 0
; CO-V2: enable_sgpr_queue_ptr = 0
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
; CO-V2: enable_sgpr_dispatch_id = 0
; CO-V2: enable_sgpr_flat_scratch_init = 0
; CO-V2: enable_sgpr_private_segment_size = 0
; CO-V2: enable_sgpr_grid_workgroup_count_x = 0
; CO-V2: enable_sgpr_grid_workgroup_count_y = 0
; CO-V2: enable_sgpr_grid_workgroup_count_z = 0
; MESA3D: user_sgpr_count = 6
; MESA3D: enable_sgpr_workgroup_id_x = 1
; MESA3D: enable_sgpr_workgroup_id_y = 0
; MESA3D: enable_sgpr_workgroup_id_z = 1
; MESA3D: enable_sgpr_workgroup_info = 0
; MESA3D: enable_vgpr_workitem_id = 0
; MESA3D: enable_sgpr_private_segment_buffer = 1
; MESA3D: enable_sgpr_dispatch_ptr = 0
; MESA3D: enable_sgpr_queue_ptr = 0
; MESA3D: enable_sgpr_kernarg_segment_ptr = 1
; MESA3D: enable_sgpr_dispatch_id = 0
; MESA3D: enable_sgpr_flat_scratch_init = 0
; MESA3D: enable_sgpr_private_segment_size = 0
; MESA3D: enable_sgpr_grid_workgroup_count_x = 0
; MESA3D: enable_sgpr_grid_workgroup_count_y = 0
; MESA3D: enable_sgpr_grid_workgroup_count_z = 0
; UNKNOWN-OS: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}}
; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}}
; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]]
; CO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOCO-V2: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; MESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 6
; ALL-NOMESA3D: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
@ -106,4 +104,4 @@ attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,11 +1,9 @@
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@ -16,7 +14,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
; MESA-NEXT: .long 132{{$}}
; ALL-LABEL: {{^}}test_workitem_id_x:
; CO-V2: enable_vgpr_workitem_id = 0
; MESA3D: enable_vgpr_workitem_id = 0
; ALL-NOT: v0
; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
@ -33,9 +31,9 @@ define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
; MESA-NEXT: .long 2180{{$}}
; ALL-LABEL: {{^}}test_workitem_id_y:
; CO-V2: enable_vgpr_workitem_id = 1
; CO-V2-NOT: v1
; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1
; MESA3D: enable_vgpr_workitem_id = 1
; MESA3D-NOT: v1
; MESA3D: {{buffer|flat}}_store_dword {{.*}}v1
; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
@ -51,9 +49,9 @@ define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
; MESA-NEXT: .long 4228{{$}}
; ALL-LABEL: {{^}}test_workitem_id_z:
; CO-V2: enable_vgpr_workitem_id = 2
; CO-V2-NOT: v2
; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2
; MESA3D: enable_vgpr_workitem_id = 2
; MESA3D-NOT: v2
; MESA3D: {{buffer|flat}}_store_dword {{.*}}v2
; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
@ -66,7 +64,7 @@ define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 {
; FIXME: Packed tid should avoid the and
; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only:
; CO-V2: enable_vgpr_workitem_id = 0
; MESA3D: enable_vgpr_workitem_id = 0
; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
@ -87,7 +85,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_
}
; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only:
; CO-V2: enable_vgpr_workitem_id = 1
; MESA3D: enable_vgpr_workitem_id = 1
; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
@ -109,7 +107,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_
}
; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only:
; CO-V2: enable_vgpr_workitem_id = 2
; MESA3D: enable_vgpr_workitem_id = 2
; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
@ -137,4 +135,4 @@ attributes #1 = { nounwind }
!0 = !{i32 64, i32 1, i32 1}
!1 = !{i32 1, i32 64, i32 1}
!2 = !{i32 1, i32 1, i32 64}
!3 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
!3 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -76,7 +76,7 @@ entry:
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: s_nop 0 // 0000000001FC: BF800000
; CHECK-NEXT: s_nop 0 // 0000000000FC: BF800000
; CHECK-EMPTY:
; CHECK-NEXT: <kernel1>:
@ -87,4 +87,4 @@ entry:
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,252 +0,0 @@
; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT16,ALL %s
; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT8,ALL,HSA-ELTGE8 %s
; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT4,ALL %s
; ALL-LABEL: {{^}}private_elt_size_v4i32:
; HSA-ELT16: private_element_size = 3
; HSA-ELT8: private_element_size = 2
; HSA-ELT4: private_element_size = 1
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24{{$}}
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}}
define amdgpu_kernel void @private_elt_size_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom
%index.load = load i32, ptr addrspace(1) %gep.index
%index = and i32 %index.load, 2
%alloca = alloca [2 x <4 x i32>], align 16, addrspace(5)
%gep1 = getelementptr inbounds [2 x <4 x i32>], ptr addrspace(5) %alloca, i32 0, i32 1
store <4 x i32> zeroinitializer, ptr addrspace(5) %alloca
store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep1
%gep2 = getelementptr inbounds [2 x <4 x i32>], ptr addrspace(5) %alloca, i32 0, i32 %index
%load = load <4 x i32>, ptr addrspace(5) %gep2
store <4 x i32> %load, ptr addrspace(1) %out
ret void
}
; ALL-LABEL: {{^}}private_elt_size_v8i32:
; HSA-ELT16: private_element_size = 3
; HSA-ELT8: private_element_size = 2
; HSA-ELT4: private_element_size = 1
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:56
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:88
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:72
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:48{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:52{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:56{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:60{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:64{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:68{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:72{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:76{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:80{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:84{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:88{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:92{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:16{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:20{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:24{{$}}
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:28{{$}}
define amdgpu_kernel void @private_elt_size_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom
%index.load = load i32, ptr addrspace(1) %gep.index
%index = and i32 %index.load, 2
%alloca = alloca [2 x <8 x i32>], align 32, addrspace(5)
%gep1 = getelementptr inbounds [2 x <8 x i32>], ptr addrspace(5) %alloca, i32 0, i32 1
store <8 x i32> zeroinitializer, ptr addrspace(5) %alloca
store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, ptr addrspace(5) %gep1
%gep2 = getelementptr inbounds [2 x <8 x i32>], ptr addrspace(5) %alloca, i32 0, i32 %index
%load = load <8 x i32>, ptr addrspace(5) %gep2
store <8 x i32> %load, ptr addrspace(1) %out
ret void
}
; ALL-LABEL: {{^}}private_elt_size_i64:
; HSA-ELT16: private_element_size = 3
; HSA-ELT8: private_element_size = 2
; HSA-ELT4: private_element_size = 1
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:1
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:2
; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @private_elt_size_i64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom
%index.load = load i32, ptr addrspace(1) %gep.index
%index = and i32 %index.load, 2
%alloca = alloca [2 x i64], align 16, addrspace(5)
%gep1 = getelementptr inbounds [2 x i64], ptr addrspace(5) %alloca, i32 0, i32 1
store i64 0, ptr addrspace(5) %alloca
store i64 34359738602, ptr addrspace(5) %gep1
%gep2 = getelementptr inbounds [2 x i64], ptr addrspace(5) %alloca, i32 0, i32 %index
%load = load i64, ptr addrspace(5) %gep2
store i64 %load, ptr addrspace(1) %out
ret void
}
; ALL-LABEL: {{^}}private_elt_size_f64:
; HSA-ELT16: private_element_size = 3
; HSA-ELT8: private_element_size = 2
; HSA-ELT4: private_element_size = 1
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24
; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
define amdgpu_kernel void @private_elt_size_f64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom
%index.load = load i32, ptr addrspace(1) %gep.index
%index = and i32 %index.load, 2
%alloca = alloca [2 x double], align 16, addrspace(5)
%gep1 = getelementptr inbounds [2 x double], ptr addrspace(5) %alloca, i32 0, i32 1
store double 0.0, ptr addrspace(5) %alloca
store double 4.0, ptr addrspace(5) %gep1
%gep2 = getelementptr inbounds [2 x double], ptr addrspace(5) %alloca, i32 0, i32 %index
%load = load double, ptr addrspace(5) %gep2
store double %load, ptr addrspace(1) %out
ret void
}
; ALL-LABEL: {{^}}private_elt_size_v2i64:
; HSA-ELT16: private_element_size = 3
; HSA-ELT8: private_element_size = 2
; HSA-ELT4: private_element_size = 1
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16{{$}}
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}}
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
define amdgpu_kernel void @private_elt_size_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%idxprom = sext i32 %tid to i64
%gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom
%index.load = load i32, ptr addrspace(1) %gep.index
%index = and i32 %index.load, 2
%alloca = alloca [2 x <2 x i64>], align 16, addrspace(5)
%gep1 = getelementptr inbounds [2 x <2 x i64>], ptr addrspace(5) %alloca, i32 0, i32 1
store <2 x i64> zeroinitializer, ptr addrspace(5) %alloca
store <2 x i64> <i64 1, i64 2>, ptr addrspace(5) %gep1
%gep2 = getelementptr inbounds [2 x <2 x i64>], ptr addrspace(5) %alloca, i32 0, i32 %index
%load = load <2 x i64>, ptr addrspace(5) %gep2
store <2 x i64> %load, ptr addrspace(1) %out
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}

View File

@ -2,7 +2,7 @@
; RUN: llc -O1 -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s
; ALL-LABEL: {{^}}promote_alloca_i32_array_array:
; NOOPTS: workgroup_group_segment_byte_size = 0{{$}}
; NOOPTS: .amdhsa_group_segment_fixed_size 0
; NOOPTS-NOT: ds_write
; OPTS: ds_write
define amdgpu_kernel void @promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) #0 {
@ -18,7 +18,7 @@ entry:
}
; ALL-LABEL: {{^}}optnone_promote_alloca_i32_array_array:
; ALL: workgroup_group_segment_byte_size = 0{{$}}
; ALL: .amdhsa_group_segment_fixed_size 0
; ALL-NOT: ds_write
define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) #1 {
entry:
@ -36,4 +36,4 @@ attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
attributes #1 = { nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -31,7 +31,7 @@
; GCN-LABEL: {{^}}promote_alloca_size_order_0:
; GCN: workgroup_group_segment_byte_size = 1060
; GCN: .amdhsa_group_segment_fixed_size 1060
define amdgpu_kernel void @promote_alloca_size_order_0(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 {
entry:
%stack = alloca [5 x i32], align 4, addrspace(5)
@ -62,7 +62,7 @@ entry:
}
; GCN-LABEL: {{^}}promote_alloca_size_order_1:
; GCN: workgroup_group_segment_byte_size = 1072
; GCN: .amdhsa_group_segment_fixed_size 1072
define amdgpu_kernel void @promote_alloca_size_order_1(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 {
entry:
%stack = alloca [5 x i32], align 4, addrspace(5)
@ -99,7 +99,7 @@ entry:
; size limit, so it isn't promoted
; GCN-LABEL: {{^}}promote_alloca_align_pad_guess_over_limit:
; GCN: workgroup_group_segment_byte_size = 1060
; GCN: .amdhsa_group_segment_fixed_size 1060
define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 {
entry:
%stack = alloca [5 x i32], align 4, addrspace(5)
@ -129,4 +129,4 @@ entry:
attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,12 +0,0 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s 2>&1 | FileCheck --check-prefix=GFX90C-VALID %s
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s 2>&1 | FileCheck --check-prefix=GFX90C-ERROR %s
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s 2>&1 | FileCheck --check-prefix=GFX940-ERROR %s
; GFX90C-VALID: .hsa_code_object_isa 9,0,12,"AMD","AMDGPU"
; GFX90C-VALID: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx90c"
; GFX90C-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx90c with XNACK being ON or ANY
; GFX940-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx940
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}

File diff suppressed because it is too large Load Diff

View File

@ -38,12 +38,11 @@ declare void @llvm.debugtrap() #1
; NOMESA-TRAP-NEXT: .long 144
; GCN-LABEL: {{^}}hsa_trap:
; HSA-TRAP: enable_trap_handler = 0
; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
; HSA-TRAP: s_trap 2
; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information
; NO-HSA-TRAP: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm
; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
@ -67,12 +66,11 @@ define amdgpu_kernel void @hsa_trap(ptr addrspace(1) nocapture readonly %arg0) {
; NOMESA-TRAP-NEXT: .long 140
; GCN-LABEL: {{^}}hsa_debugtrap:
; HSA-TRAP: enable_trap_handler = 0
; HSA-TRAP: s_trap 3
; HSA-TRAP: flat_store_dword v[0:1], v3
; HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
; NO-HSA-TRAP: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm
; TRAP-BIT: enable_trap_handler = 1
@ -148,4 +146,4 @@ attributes #0 = { nounwind noreturn }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

View File

@ -1,5 +1,5 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | FileCheck %s --check-prefix=ASM
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
// ELF: Section {
// ELF: Name: .text
@ -8,121 +8,67 @@
// ELF: SHF_ALLOC (0x2)
// ELF: SHF_EXECINSTR (0x4)
// ELF: SHT_NOTE
// ELF: 0000: 04000000 08000000 01000000 414D4400
// ELF: 0010: 02000000 00000000 04000000 1B000000
// ELF: 0020: 03000000 414D4400 04000700 07000000
// ELF: 0030: 00000000 00000000 414D4400 414D4447
// ELF: 0040: 50550000
// ELF: Symbol {
// ELF: Name: amd_kernel_code_t_minimal
// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
// ELF: Name: minimal
// ELF: Section: .text
// ELF: }
.text
// ASM: .text
.hsa_code_object_version 2,0
// ASM: .hsa_code_object_version 2,0
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.amdgpu_hsa_kernel amd_kernel_code_t_minimal
.amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
// ASM: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700"
.set my_is_ptr64, 1
.if my_is_ptr64 == 0
.set my_kernarg_segment_byte_size, 32
.set my_next_free_vgpr, 4
.else
.set my_kernarg_segment_byte_size, 16
.set my_next_free_vgpr, 8
.endif
.set my_sgpr, 8
.set my_sgpr, 6
minimal:
.amdhsa_kernel minimal
.amdhsa_next_free_vgpr 1+(my_next_free_vgpr-1)
// Make sure a blank line won't break anything:
amd_kernel_code_t_minimal:
.amd_kernel_code_t
amd_code_version_major = .option.machine_version_major
enable_sgpr_kernarg_segment_ptr = 1
is_ptr64 = my_is_ptr64
granulated_workitem_vgpr_count = 1
granulated_wavefront_sgpr_count = 1+(my_sgpr-1)/8
user_sgpr_count = 2
kernarg_segment_byte_size = my_kernarg_segment_byte_size
wavefront_sgpr_count = my_sgpr
// wavefront_sgpr_count = 7
; wavefront_sgpr_count = 7
// Make sure a blank line won't break anything:
.amdhsa_next_free_sgpr my_sgpr/2+3
.end_amdhsa_kernel
// Make sure a line with whitespace won't break anything:
workitem_vgpr_count = 16
.end_amd_kernel_code_t
// ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
// ASM: .amd_kernel_code_t
// ASM: amd_code_version_major = 7
// ASM: amd_code_version_minor = 2
// ASM: amd_machine_kind = 1
// ASM: amd_machine_version_major = 7
// ASM: amd_machine_version_minor = 0
// ASM: amd_machine_version_stepping = 0
// ASM: kernel_code_entry_byte_offset = 256
// ASM: kernel_code_prefetch_byte_size = 0
// ASM: granulated_workitem_vgpr_count = 1
// ASM: granulated_wavefront_sgpr_count = 1
// ASM: priority = 0
// ASM: float_mode = 0
// ASM: priv = 0
// ASM: enable_dx10_clamp = 0
// ASM: debug_mode = 0
// ASM: enable_ieee_mode = 0
// ASM: enable_sgpr_private_segment_wave_byte_offset = 0
// ASM: user_sgpr_count = 2
// ASM: enable_sgpr_workgroup_id_x = 0
// ASM: enable_sgpr_workgroup_id_y = 0
// ASM: enable_sgpr_workgroup_id_z = 0
// ASM: enable_sgpr_workgroup_info = 0
// ASM: enable_vgpr_workitem_id = 0
// ASM: enable_exception_msb = 0
// ASM: granulated_lds_size = 0
// ASM: enable_exception = 0
// ASM: enable_sgpr_private_segment_buffer = 0
// ASM: enable_sgpr_dispatch_ptr = 0
// ASM: enable_sgpr_queue_ptr = 0
// ASM: enable_sgpr_kernarg_segment_ptr = 1
// ASM: enable_sgpr_dispatch_id = 0
// ASM: enable_sgpr_flat_scratch_init = 0
// ASM: enable_sgpr_private_segment_size = 0
// ASM: enable_sgpr_grid_workgroup_count_x = 0
// ASM: enable_sgpr_grid_workgroup_count_y = 0
// ASM: enable_sgpr_grid_workgroup_count_z = 0
// ASM: enable_ordered_append_gds = 0
// ASM: private_element_size = 0
// ASM: is_ptr64 = 1
// ASM: is_dynamic_callstack = 0
// ASM: is_debug_enabled = 0
// ASM: is_xnack_enabled = 0
// ASM: workitem_private_segment_byte_size = 0
// ASM: workgroup_group_segment_byte_size = 0
// ASM: gds_segment_byte_size = 0
// ASM: kernarg_segment_byte_size = 16
// ASM: workgroup_fbarrier_count = 0
// ASM: wavefront_sgpr_count = 8
// ASM: workitem_vgpr_count = 16
// ASM: reserved_vgpr_first = 0
// ASM: reserved_vgpr_count = 0
// ASM: reserved_sgpr_first = 0
// ASM: reserved_sgpr_count = 0
// ASM: debug_wavefront_private_segment_offset_sgpr = 0
// ASM: debug_private_segment_buffer_sgpr = 0
// ASM: kernarg_segment_alignment = 4
// ASM: group_segment_alignment = 4
// ASM: private_segment_alignment = 4
// ASM: wavefront_size = 6
// ASM: call_convention = -1
// ASM: runtime_loader_kernel_symbol = 0
// ASM: .end_amd_kernel_code_t
; ASM-LABEL: minimal:
; ASM: .amdhsa_kernel minimal
; ASM: .amdhsa_group_segment_fixed_size 0
; ASM: .amdhsa_private_segment_fixed_size 0
; ASM: .amdhsa_kernarg_size 0
; ASM: .amdhsa_user_sgpr_count 0
; ASM: .amdhsa_user_sgpr_private_segment_buffer 0
; ASM: .amdhsa_user_sgpr_dispatch_ptr 0
; ASM: .amdhsa_user_sgpr_queue_ptr 0
; ASM: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; ASM: .amdhsa_user_sgpr_dispatch_id 0
; ASM: .amdhsa_user_sgpr_flat_scratch_init 0
; ASM: .amdhsa_user_sgpr_private_segment_size 0
; ASM: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
; ASM: .amdhsa_system_sgpr_workgroup_id_x 1
; ASM: .amdhsa_system_sgpr_workgroup_id_y 0
; ASM: .amdhsa_system_sgpr_workgroup_id_z 0
; ASM: .amdhsa_system_sgpr_workgroup_info 0
; ASM: .amdhsa_system_vgpr_workitem_id 0
; ASM: .amdhsa_next_free_vgpr 8
; ASM: .amdhsa_next_free_sgpr 6
; ASM: .amdhsa_float_round_mode_32 0
; ASM: .amdhsa_float_round_mode_16_64 0
; ASM: .amdhsa_float_denorm_mode_32 0
; ASM: .amdhsa_float_denorm_mode_16_64 3
; ASM: .amdhsa_dx10_clamp 1
; ASM: .amdhsa_ieee_mode 1
; ASM: .amdhsa_exception_fp_ieee_invalid_op 0
; ASM: .amdhsa_exception_fp_denorm_src 0
; ASM: .amdhsa_exception_fp_ieee_div_zero 0
; ASM: .amdhsa_exception_fp_ieee_overflow 0
; ASM: .amdhsa_exception_fp_ieee_underflow 0
; ASM: .amdhsa_exception_fp_ieee_inexact 0
; ASM: .amdhsa_exception_int_div_zero 0
; ASM: .end_amdhsa_kernel

View File

@ -1,284 +0,0 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck %s --check-prefix=ASM
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF
// ELF: Section {
// ELF: Name: .text
// ELF: Type: SHT_PROGBITS (0x1)
// ELF: Flags [ (0x6)
// ELF: SHF_ALLOC (0x2)
// ELF: SHF_EXECINSTR (0x4)
// ELF: SHT_NOTE
// ELF: 0000: 04000000 08000000 01000000 414D4400
// ELF: 0010: 02000000 00000000 04000000 1B000000
// ELF: 0020: 03000000 414D4400 04000700 07000000
// ELF: 0030: 00000000 00000000 414D4400 414D4447
// ELF: 0040: 50550000
// We can't check binary representation of metadata note: it is different on
// Windows and Linux because of carriage return on Windows
// ELF: Symbol {
// ELF: Name: amd_kernel_code_t_test_all
// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
// ELF: Section: .text
// ELF: }
// ELF: Symbol {
// ELF: Name: amd_kernel_code_t_minimal
// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
// ELF: Section: .text
// ELF: }
.text
// ASM: .text
.hsa_code_object_version 2,0
// ASM: .hsa_code_object_version 2,0
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.amd_amdgpu_hsa_metadata
Version: [ 3, 0 ]
Kernels:
- Name: amd_kernel_code_t_test_all
SymbolName: amd_kernel_code_t_test_all@kd
- Name: amd_kernel_code_t_minimal
SymbolName: amd_kernel_code_t_minimal@kd
.end_amd_amdgpu_hsa_metadata
// ASM: .amd_amdgpu_hsa_metadata
// ASM: Version: [ 3, 0 ]
// ASM: Kernels:
// ASM: - Name: amd_kernel_code_t_test_all
// ASM: SymbolName: 'amd_kernel_code_t_test_all@kd'
// ASM: - Name: amd_kernel_code_t_minimal
// ASM: SymbolName: 'amd_kernel_code_t_minimal@kd'
// ASM: .end_amd_amdgpu_hsa_metadata
.amdgpu_hsa_kernel amd_kernel_code_t_test_all
.amdgpu_hsa_kernel amd_kernel_code_t_minimal
amd_kernel_code_t_test_all:
; Test all amd_kernel_code_t members with non-default values.
.amd_kernel_code_t
kernel_code_version_major = 100
kernel_code_version_minor = 100
machine_kind = 0
machine_version_major = 5
machine_version_minor = 5
machine_version_stepping = 5
kernel_code_entry_byte_offset = 512
kernel_code_prefetch_byte_size = 1
max_scratch_backing_memory_byte_size = 1
compute_pgm_rsrc1_vgprs = 1
compute_pgm_rsrc1_sgprs = 1
compute_pgm_rsrc1_priority = 1
compute_pgm_rsrc1_float_mode = 1
compute_pgm_rsrc1_priv = 1
compute_pgm_rsrc1_dx10_clamp = 1
compute_pgm_rsrc1_debug_mode = 1
compute_pgm_rsrc1_ieee_mode = 1
compute_pgm_rsrc1_wgp_mode = 0
compute_pgm_rsrc1_mem_ordered = 0
compute_pgm_rsrc1_fwd_progress = 1
compute_pgm_rsrc2_scratch_en = 1
compute_pgm_rsrc2_user_sgpr = 1
compute_pgm_rsrc2_tgid_x_en = 1
compute_pgm_rsrc2_tgid_y_en = 1
compute_pgm_rsrc2_tgid_z_en = 1
compute_pgm_rsrc2_tg_size_en = 1
compute_pgm_rsrc2_tidig_comp_cnt = 1
compute_pgm_rsrc2_excp_en_msb = 1
compute_pgm_rsrc2_lds_size = 1
compute_pgm_rsrc2_excp_en = 1
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 1
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 1
enable_sgpr_flat_scratch_init = 1
enable_sgpr_private_segment_size = 1
enable_sgpr_grid_workgroup_count_x = 1
enable_sgpr_grid_workgroup_count_y = 1
enable_sgpr_grid_workgroup_count_z = 1
enable_ordered_append_gds = 1
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 1
is_debug_enabled = 1
is_xnack_enabled = 1
workitem_private_segment_byte_size = 1
workgroup_group_segment_byte_size = 1
gds_segment_byte_size = 1
kernarg_segment_byte_size = 1
workgroup_fbarrier_count = 1
wavefront_sgpr_count = 1
workitem_vgpr_count = 1
reserved_vgpr_first = 1
reserved_vgpr_count = 1
reserved_sgpr_first = 1
reserved_sgpr_count = 1
debug_wavefront_private_segment_offset_sgpr = 1
debug_private_segment_buffer_sgpr = 1
kernarg_segment_alignment = 5
group_segment_alignment = 5
private_segment_alignment = 5
wavefront_size = 6
call_convention = 1
runtime_loader_kernel_symbol = 1
.end_amd_kernel_code_t
// ASM-LABEL: {{^}}amd_kernel_code_t_test_all:
// ASM: .amd_kernel_code_t
// ASM: amd_code_version_major = 100
// ASM: amd_code_version_minor = 100
// ASM: amd_machine_kind = 0
// ASM: amd_machine_version_major = 5
// ASM: amd_machine_version_minor = 5
// ASM: amd_machine_version_stepping = 5
// ASM: kernel_code_entry_byte_offset = 512
// ASM: kernel_code_prefetch_byte_size = 1
// ASM: granulated_workitem_vgpr_count = 1
// ASM: granulated_wavefront_sgpr_count = 1
// ASM: priority = 1
// ASM: float_mode = 1
// ASM: priv = 1
// ASM: enable_dx10_clamp = 1
// ASM: debug_mode = 1
// ASM: enable_ieee_mode = 1
// ASM: enable_wgp_mode = 0
// ASM: enable_mem_ordered = 0
// ASM: enable_fwd_progress = 1
// ASM: enable_sgpr_private_segment_wave_byte_offset = 1
// ASM: user_sgpr_count = 1
// ASM: enable_sgpr_workgroup_id_x = 1
// ASM: enable_sgpr_workgroup_id_y = 1
// ASM: enable_sgpr_workgroup_id_z = 1
// ASM: enable_sgpr_workgroup_info = 1
// ASM: enable_vgpr_workitem_id = 1
// ASM: enable_exception_msb = 1
// ASM: granulated_lds_size = 1
// ASM: enable_exception = 1
// ASM: enable_sgpr_private_segment_buffer = 1
// ASM: enable_sgpr_dispatch_ptr = 1
// ASM: enable_sgpr_queue_ptr = 1
// ASM: enable_sgpr_kernarg_segment_ptr = 1
// ASM: enable_sgpr_dispatch_id = 1
// ASM: enable_sgpr_flat_scratch_init = 1
// ASM: enable_sgpr_private_segment_size = 1
// ASM: enable_sgpr_grid_workgroup_count_x = 1
// ASM: enable_sgpr_grid_workgroup_count_y = 1
// ASM: enable_sgpr_grid_workgroup_count_z = 1
// ASM: enable_ordered_append_gds = 1
// ASM: private_element_size = 1
// ASM: is_ptr64 = 1
// ASM: is_dynamic_callstack = 1
// ASM: is_debug_enabled = 1
// ASM: is_xnack_enabled = 1
// ASM: workitem_private_segment_byte_size = 1
// ASM: workgroup_group_segment_byte_size = 1
// ASM: gds_segment_byte_size = 1
// ASM: kernarg_segment_byte_size = 1
// ASM: workgroup_fbarrier_count = 1
// ASM: wavefront_sgpr_count = 1
// ASM: workitem_vgpr_count = 1
// ASM: reserved_vgpr_first = 1
// ASM: reserved_vgpr_count = 1
// ASM: reserved_sgpr_first = 1
// ASM: reserved_sgpr_count = 1
// ASM: debug_wavefront_private_segment_offset_sgpr = 1
// ASM: debug_private_segment_buffer_sgpr = 1
// ASM: kernarg_segment_alignment = 5
// ASM: group_segment_alignment = 5
// ASM: private_segment_alignment = 5
// ASM: wavefront_size = 6
// ASM: call_convention = 1
// ASM: runtime_loader_kernel_symbol = 1
// ASM: .end_amd_kernel_code_t
amd_kernel_code_t_minimal:
.amd_kernel_code_t
enable_sgpr_kernarg_segment_ptr = 1
is_ptr64 = 1
granulated_workitem_vgpr_count = 1
granulated_wavefront_sgpr_count = 1
user_sgpr_count = 2
kernarg_segment_byte_size = 16
wavefront_sgpr_count = 8
// wavefront_sgpr_count = 7
; wavefront_sgpr_count = 7
// Make sure a blank line won't break anything:
// Make sure a line with whitespace won't break anything:
workitem_vgpr_count = 16
.end_amd_kernel_code_t
// ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
// ASM: .amd_kernel_code_t
// ASM: amd_code_version_major = 1
// ASM: amd_code_version_minor = 2
// ASM: amd_machine_kind = 1
// ASM: amd_machine_version_major = 10
// ASM: amd_machine_version_minor = 1
// ASM: amd_machine_version_stepping = 0
// ASM: kernel_code_entry_byte_offset = 256
// ASM: kernel_code_prefetch_byte_size = 0
// ASM: granulated_workitem_vgpr_count = 1
// ASM: granulated_wavefront_sgpr_count = 1
// ASM: priority = 0
// ASM: float_mode = 0
// ASM: priv = 0
// ASM: enable_dx10_clamp = 0
// ASM: debug_mode = 0
// ASM: enable_ieee_mode = 0
// ASM: enable_wgp_mode = 1
// ASM: enable_mem_ordered = 1
// ASM: enable_fwd_progress = 0
// ASM: enable_sgpr_private_segment_wave_byte_offset = 0
// ASM: user_sgpr_count = 2
// ASM: enable_sgpr_workgroup_id_x = 0
// ASM: enable_sgpr_workgroup_id_y = 0
// ASM: enable_sgpr_workgroup_id_z = 0
// ASM: enable_sgpr_workgroup_info = 0
// ASM: enable_vgpr_workitem_id = 0
// ASM: enable_exception_msb = 0
// ASM: granulated_lds_size = 0
// ASM: enable_exception = 0
// ASM: enable_sgpr_private_segment_buffer = 0
// ASM: enable_sgpr_dispatch_ptr = 0
// ASM: enable_sgpr_queue_ptr = 0
// ASM: enable_sgpr_kernarg_segment_ptr = 1
// ASM: enable_sgpr_dispatch_id = 0
// ASM: enable_sgpr_flat_scratch_init = 0
// ASM: enable_sgpr_private_segment_size = 0
// ASM: enable_sgpr_grid_workgroup_count_x = 0
// ASM: enable_sgpr_grid_workgroup_count_y = 0
// ASM: enable_sgpr_grid_workgroup_count_z = 0
// ASM: enable_wavefront_size32 = 0
// ASM: enable_ordered_append_gds = 0
// ASM: private_element_size = 0
// ASM: is_ptr64 = 1
// ASM: is_dynamic_callstack = 0
// ASM: is_debug_enabled = 0
// ASM: is_xnack_enabled = 0
// ASM: workitem_private_segment_byte_size = 0
// ASM: workgroup_group_segment_byte_size = 0
// ASM: gds_segment_byte_size = 0
// ASM: kernarg_segment_byte_size = 16
// ASM: workgroup_fbarrier_count = 0
// ASM: wavefront_sgpr_count = 8
// ASM: workitem_vgpr_count = 16
// ASM: reserved_vgpr_first = 0
// ASM: reserved_vgpr_count = 0
// ASM: reserved_sgpr_first = 0
// ASM: reserved_sgpr_count = 0
// ASM: debug_wavefront_private_segment_offset_sgpr = 0
// ASM: debug_private_segment_buffer_sgpr = 0
// ASM: kernarg_segment_alignment = 4
// ASM: group_segment_alignment = 4
// ASM: private_segment_alignment = 4
// ASM: wavefront_size = 6
// ASM: call_convention = -1
// ASM: runtime_loader_kernel_symbol = 0
// ASM: .end_amd_kernel_code_t

View File

@ -1,70 +0,0 @@
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// The legacy ValueType field should be parsed without error, but not
// re-emitted.
// CHECK: .amd_amdgpu_hsa_metadata
// CHECK: Version: [ 1, 0 ]
// CHECK: Printf:
// CHECK: - '1:1:4:%d\n'
// CHECK: - '2:1:8:%g\n'
// CHECK: Kernels:
// CHECK: - Name: test_kernel
// CHECK: SymbolName: 'test_kernel@kd'
// CHECK: Language: OpenCL C
// CHECK: LanguageVersion: [ 2, 0 ]
// CHECK: Args:
// CHECK: - TypeName: char
// CHECK: Size: 1
// CHECK: Align: 1
// CHECK: ValueKind: ByValue
// CHECK: AccQual: Default
// CHECK: - Size: 8
// CHECK: Align: 8
// CHECK: ValueKind: HiddenGlobalOffsetX
// CHECK: - Size: 8
// CHECK: Align: 8
// CHECK: ValueKind: HiddenGlobalOffsetY
// CHECK: - Size: 8
// CHECK: Align: 8
// CHECK: ValueKind: HiddenGlobalOffsetZ
// CHECK: - Size: 8
// CHECK: Align: 8
// CHECK: ValueKind: HiddenPrintfBuffer
// CHECK: AddrSpaceQual: Global
// CHECK: .end_amd_amdgpu_hsa_metadata
.amd_amdgpu_hsa_metadata
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
Kernels:
- Name: test_kernel
SymbolName: test_kernel@kd
Language: OpenCL C
LanguageVersion: [ 2, 0 ]
Args:
- TypeName: char
Size: 1
Align: 1
ValueKind: ByValue
ValueType: I8
AccQual: Default
- Size: 8
Align: 8
ValueKind: HiddenGlobalOffsetX
ValueType: I64
- Size: 8
Align: 8
ValueKind: HiddenGlobalOffsetY
ValueType: I64
- Size: 8
Align: 8
ValueKind: HiddenGlobalOffsetZ
ValueType: I64
- Size: 8
Align: 8
ValueKind: HiddenPrintfBuffer
ValueType: I8
AddrSpaceQual: Global
.end_amd_amdgpu_hsa_metadata

View File

@ -1,32 +0,0 @@
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// CHECK: .amd_amdgpu_hsa_metadata
// CHECK: Version: [ 1, 0 ]
// CHECK: Printf:
// CHECK: - '1:1:4:%d\n'
// CHECK: - '2:1:8:%g\n'
// CHECK: Kernels:
// CHECK: - Name: test_kernel
// CHECK: SymbolName: 'test_kernel@kd'
// CHECK: Language: OpenCL C
// CHECK: LanguageVersion: [ 2, 0 ]
// CHECK: Attrs:
// CHECK: ReqdWorkGroupSize: [ 1, 2, 4 ]
// CHECK: WorkGroupSizeHint: [ 8, 16, 32 ]
// CHECK: VecTypeHint: int
// CHECK: .end_amd_amdgpu_hsa_metadata
.amd_amdgpu_hsa_metadata
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
Kernels:
- Name: test_kernel
SymbolName: test_kernel@kd
Language: OpenCL C
LanguageVersion: [ 2, 0 ]
Attrs:
ReqdWorkGroupSize: [ 1, 2, 4 ]
WorkGroupSizeHint: [ 8, 16, 32 ]
VecTypeHint: int
.end_amd_amdgpu_hsa_metadata

View File

@ -1,34 +0,0 @@
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// CHECK: .amd_amdgpu_hsa_metadata
// CHECK: Version: [ 1, 0 ]
// CHECK: Kernels:
// CHECK: - Name: test_kernel
// CHECK: SymbolName: 'test_kernel@kd'
// CHECK: CodeProps:
// CHECK: KernargSegmentSize: 24
// CHECK: GroupSegmentFixedSize: 24
// CHECK: PrivateSegmentFixedSize: 16
// CHECK: KernargSegmentAlign: 16
// CHECK: WavefrontSize: 64
// CHECK: MaxFlatWorkGroupSize: 256
// CHECK: NumSpilledSGPRs: 1
// CHECK: NumSpilledVGPRs: 1
.amd_amdgpu_hsa_metadata
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
Kernels:
- Name: test_kernel
SymbolName: test_kernel@kd
CodeProps:
KernargSegmentSize: 24
GroupSegmentFixedSize: 24
PrivateSegmentFixedSize: 16
KernargSegmentAlign: 16
WavefrontSize: 64
MaxFlatWorkGroupSize: 256
NumSpilledSGPRs: 1
NumSpilledVGPRs: 1
.end_amd_amdgpu_hsa_metadata

View File

@ -1,28 +0,0 @@
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck --check-prefix=CHECK %s
// CHECK: .amd_amdgpu_hsa_metadata
// CHECK: Version: [ 1, 0 ]
// CHECK: Kernels:
// CHECK: - Name: test_kernel
// CHECK: SymbolName: 'test_kernel@kd'
// CHECK: DebugProps:
// CHECK: DebuggerABIVersion: [ 1, 0 ]
// CHECK: ReservedNumVGPRs: 4
// CHECK: ReservedFirstVGPR: 11
// CHECK: PrivateSegmentBufferSGPR: 0
// CHECK: WavefrontPrivateSegmentOffsetSGPR: 11
.amd_amdgpu_hsa_metadata
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
Kernels:
- Name: test_kernel
SymbolName: test_kernel@kd
DebugProps:
DebuggerABIVersion: [ 1, 0 ]
ReservedNumVGPRs: 4
ReservedFirstVGPR: 11
PrivateSegmentBufferSGPR: 0
WavefrontPrivateSegmentOffsetSGPR: 11
.end_amd_amdgpu_hsa_metadata

View File

@ -1,42 +0,0 @@
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj %s 2>&1 | FileCheck %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 --amdhsa-code-object-version=2 -filetype=obj %s 2>&1 | FileCheck %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj %s 2>&1 | FileCheck %s
// CHECK: error: unknown key 'UnknownKey'
.amd_amdgpu_hsa_metadata
UnknownKey: [ 2, 0 ]
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
Kernels:
- Name: test_kernel
SymbolName: test_kernel@kd
Language: OpenCL C
LanguageVersion: [ 2, 0 ]
Args:
- Size: 1
Align: 1
ValueKind: ByValue
ValueType: I8
AccQual: Default
TypeName: char
- Size: 8
Align: 8
ValueKind: HiddenGlobalOffsetX
ValueType: I64
- Size: 8
Align: 8
ValueKind: HiddenGlobalOffsetY
ValueType: I64
- Size: 8
Align: 8
ValueKind: HiddenGlobalOffsetZ
ValueType: I64
- Size: 8
Align: 8
ValueKind: HiddenPrintfBuffer
ValueType: I8
AddrSpaceQual: Global
.end_amd_amdgpu_hsa_metadata

View File

@ -1,28 +0,0 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF
// For compatibility reasons we treat convert .text sections to .hsatext
// ELF: Section {
// ELF: Name: .text
// ELF: Type: SHT_PROGBITS (0x1)
// ELF: Flags [ (0x6)
// ELF: SHF_ALLOC (0x2)
// ELF: SHF_EXECINSTR (0x4)
// ELF: Size: 260
// ELF: }
.text
// ASM: .text
.hsa_code_object_version 1,0
// ASM: .hsa_code_object_version 1,0
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.amd_kernel_code_t
.end_amd_kernel_code_t
s_endpgm

View File

@ -1,65 +0,0 @@
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s | FileCheck --check-prefixes=GCN,GFX7 %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 %s 2>&1 | FileCheck --check-prefix=GFX7-ERR %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-W32-ERR %s
// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX10-W64-ERR %s
// GCN: test0:
// GFX7: enable_wavefront_size32 = 0
// GFX7: wavefront_size = 6
// GFX10-W32: enable_wavefront_size32 = 1
// GFX10-W32: wavefront_size = 5
// GFX10-W64: enable_wavefront_size32 = 0
// GFX10-W64: wavefront_size = 6
.amdgpu_hsa_kernel test0
test0:
.amd_kernel_code_t
.end_amd_kernel_code_t
// GCN: test1:
// GFX7: enable_wavefront_size32 = 0
// GFX7: wavefront_size = 6
// GFX10-W32-ERR: error: enable_wavefront_size32=0 requires +WavefrontSize64
// GFX10-W64: enable_wavefront_size32 = 0
// GFX10-W64: wavefront_size = 6
.amdgpu_hsa_kernel test1
test1:
.amd_kernel_code_t
enable_wavefront_size32 = 0
.end_amd_kernel_code_t
// GCN: test2:
// GFX7: enable_wavefront_size32 = 0
// GFX7: wavefront_size = 6
// GFX10-W32-ERR: error: wavefront_size=6 requires +WavefrontSize64
// GFX10-W64: enable_wavefront_size32 = 0
// GFX10-W64: wavefront_size = 6
.amdgpu_hsa_kernel test2
test2:
.amd_kernel_code_t
wavefront_size = 6
.end_amd_kernel_code_t
// GCN: test3:
// GFX7-ERR: error: enable_wavefront_size32=1 is only allowed on GFX10+
// GFX10-W32: enable_wavefront_size32 = 1
// GFX10-W32: wavefront_size = 5
// GFX10-W64-ERR: error: enable_wavefront_size32=1 requires +WavefrontSize32
.amdgpu_hsa_kernel test3
test3:
.amd_kernel_code_t
enable_wavefront_size32 = 1
.end_amd_kernel_code_t
// GCN: test4:
// GFX7-ERR: error: wavefront_size=5 is only allowed on GFX10+
// GFX10-W32: enable_wavefront_size32 = 1
// GFX10-W32: wavefront_size = 5
// GFX10-W64-ERR: error: wavefront_size=5 requires +WavefrontSize32
.amdgpu_hsa_kernel test4
test4:
.amd_kernel_code_t
wavefront_size = 5
.end_amd_kernel_code_t

View File

@ -1,274 +0,0 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF
// ELF: Section {
// ELF: Name: .text
// ELF: Type: SHT_PROGBITS (0x1)
// ELF: Flags [ (0x6)
// ELF: SHF_ALLOC (0x2)
// ELF: SHF_EXECINSTR (0x4)
// ELF: SHT_NOTE
// ELF: 0000: 04000000 08000000 01000000 414D4400
// ELF: 0010: 02000000 00000000 04000000 1B000000
// ELF: 0020: 03000000 414D4400 04000700 07000000
// ELF: 0030: 00000000 00000000 414D4400 414D4447
// ELF: 0040: 50550000
// We can't check binary representation of metadata note: it is different on
// Windows and Linux because of carriage return on Windows
// ELF: Symbol {
// ELF: Name: amd_kernel_code_t_test_all
// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
// ELF: Section: .text
// ELF: }
// ELF: Symbol {
// ELF: Name: amd_kernel_code_t_minimal
// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
// ELF: Section: .text
// ELF: }
.text
// ASM: .text
.hsa_code_object_version 2,0
// ASM: .hsa_code_object_version 2,0
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.amd_amdgpu_hsa_metadata
Version: [ 3, 0 ]
Kernels:
- Name: amd_kernel_code_t_test_all
SymbolName: amd_kernel_code_t_test_all@kd
- Name: amd_kernel_code_t_minimal
SymbolName: amd_kernel_code_t_minimal@kd
.end_amd_amdgpu_hsa_metadata
// ASM: .amd_amdgpu_hsa_metadata
// ASM: Version: [ 3, 0 ]
// ASM: Kernels:
// ASM: - Name: amd_kernel_code_t_test_all
// ASM: SymbolName: 'amd_kernel_code_t_test_all@kd'
// ASM: - Name: amd_kernel_code_t_minimal
// ASM: SymbolName: 'amd_kernel_code_t_minimal@kd'
// ASM: .end_amd_amdgpu_hsa_metadata
.amdgpu_hsa_kernel amd_kernel_code_t_test_all
.amdgpu_hsa_kernel amd_kernel_code_t_minimal
amd_kernel_code_t_test_all:
; Test all amd_kernel_code_t members with non-default values.
.amd_kernel_code_t
kernel_code_version_major = 100
kernel_code_version_minor = 100
machine_kind = 0
machine_version_major = 5
machine_version_minor = 5
machine_version_stepping = 5
kernel_code_entry_byte_offset = 512
kernel_code_prefetch_byte_size = 1
max_scratch_backing_memory_byte_size = 1
compute_pgm_rsrc1_vgprs = 1
compute_pgm_rsrc1_sgprs = 1
compute_pgm_rsrc1_priority = 1
compute_pgm_rsrc1_float_mode = 1
compute_pgm_rsrc1_priv = 1
compute_pgm_rsrc1_dx10_clamp = 1
compute_pgm_rsrc1_debug_mode = 1
compute_pgm_rsrc1_ieee_mode = 1
compute_pgm_rsrc2_scratch_en = 1
compute_pgm_rsrc2_user_sgpr = 1
compute_pgm_rsrc2_tgid_x_en = 1
compute_pgm_rsrc2_tgid_y_en = 1
compute_pgm_rsrc2_tgid_z_en = 1
compute_pgm_rsrc2_tg_size_en = 1
compute_pgm_rsrc2_tidig_comp_cnt = 1
compute_pgm_rsrc2_excp_en_msb = 1
compute_pgm_rsrc2_lds_size = 1
compute_pgm_rsrc2_excp_en = 1
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 1
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 1
enable_sgpr_flat_scratch_init = 1
enable_sgpr_private_segment_size = 1
enable_sgpr_grid_workgroup_count_x = 1
enable_sgpr_grid_workgroup_count_y = 1
enable_sgpr_grid_workgroup_count_z = 1
enable_ordered_append_gds = 1
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 1
is_debug_enabled = 1
is_xnack_enabled = 1
workitem_private_segment_byte_size = 1
workgroup_group_segment_byte_size = 1
gds_segment_byte_size = 1
kernarg_segment_byte_size = 1
workgroup_fbarrier_count = 1
wavefront_sgpr_count = 1
workitem_vgpr_count = 1
reserved_vgpr_first = 1
reserved_vgpr_count = 1
reserved_sgpr_first = 1
reserved_sgpr_count = 1
debug_wavefront_private_segment_offset_sgpr = 1
debug_private_segment_buffer_sgpr = 1
kernarg_segment_alignment = 5
group_segment_alignment = 5
private_segment_alignment = 5
wavefront_size = 6
call_convention = 1
runtime_loader_kernel_symbol = 1
.end_amd_kernel_code_t
// ASM-LABEL: {{^}}amd_kernel_code_t_test_all:
// ASM: .amd_kernel_code_t
// ASM: amd_code_version_major = 100
// ASM: amd_code_version_minor = 100
// ASM: amd_machine_kind = 0
// ASM: amd_machine_version_major = 5
// ASM: amd_machine_version_minor = 5
// ASM: amd_machine_version_stepping = 5
// ASM: kernel_code_entry_byte_offset = 512
// ASM: kernel_code_prefetch_byte_size = 1
// ASM: granulated_workitem_vgpr_count = 1
// ASM: granulated_wavefront_sgpr_count = 1
// ASM: priority = 1
// ASM: float_mode = 1
// ASM: priv = 1
// ASM: enable_dx10_clamp = 1
// ASM: debug_mode = 1
// ASM: enable_ieee_mode = 1
// ASM: enable_sgpr_private_segment_wave_byte_offset = 1
// ASM: user_sgpr_count = 1
// ASM: enable_sgpr_workgroup_id_x = 1
// ASM: enable_sgpr_workgroup_id_y = 1
// ASM: enable_sgpr_workgroup_id_z = 1
// ASM: enable_sgpr_workgroup_info = 1
// ASM: enable_vgpr_workitem_id = 1
// ASM: enable_exception_msb = 1
// ASM: granulated_lds_size = 1
// ASM: enable_exception = 1
// ASM: enable_sgpr_private_segment_buffer = 1
// ASM: enable_sgpr_dispatch_ptr = 1
// ASM: enable_sgpr_queue_ptr = 1
// ASM: enable_sgpr_kernarg_segment_ptr = 1
// ASM: enable_sgpr_dispatch_id = 1
// ASM: enable_sgpr_flat_scratch_init = 1
// ASM: enable_sgpr_private_segment_size = 1
// ASM: enable_sgpr_grid_workgroup_count_x = 1
// ASM: enable_sgpr_grid_workgroup_count_y = 1
// ASM: enable_sgpr_grid_workgroup_count_z = 1
// ASM: enable_ordered_append_gds = 1
// ASM: private_element_size = 1
// ASM: is_ptr64 = 1
// ASM: is_dynamic_callstack = 1
// ASM: is_debug_enabled = 1
// ASM: is_xnack_enabled = 1
// ASM: workitem_private_segment_byte_size = 1
// ASM: workgroup_group_segment_byte_size = 1
// ASM: gds_segment_byte_size = 1
// ASM: kernarg_segment_byte_size = 1
// ASM: workgroup_fbarrier_count = 1
// ASM: wavefront_sgpr_count = 1
// ASM: workitem_vgpr_count = 1
// ASM: reserved_vgpr_first = 1
// ASM: reserved_vgpr_count = 1
// ASM: reserved_sgpr_first = 1
// ASM: reserved_sgpr_count = 1
// ASM: debug_wavefront_private_segment_offset_sgpr = 1
// ASM: debug_private_segment_buffer_sgpr = 1
// ASM: kernarg_segment_alignment = 5
// ASM: group_segment_alignment = 5
// ASM: private_segment_alignment = 5
// ASM: wavefront_size = 6
// ASM: call_convention = 1
// ASM: runtime_loader_kernel_symbol = 1
// ASM: .end_amd_kernel_code_t
amd_kernel_code_t_minimal:
.amd_kernel_code_t
enable_sgpr_kernarg_segment_ptr = 1
is_ptr64 = 1
granulated_workitem_vgpr_count = 1
granulated_wavefront_sgpr_count = 1
user_sgpr_count = 2
kernarg_segment_byte_size = 16
wavefront_sgpr_count = 8
// wavefront_sgpr_count = 7
; wavefront_sgpr_count = 7
// Make sure a blank line won't break anything:
// Make sure a line with whitespace won't break anything:
workitem_vgpr_count = 16
.end_amd_kernel_code_t
// ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
// ASM: .amd_kernel_code_t
// ASM: amd_code_version_major = 1
// ASM: amd_code_version_minor = 2
// ASM: amd_machine_kind = 1
// ASM: amd_machine_version_major = 7
// ASM: amd_machine_version_minor = 0
// ASM: amd_machine_version_stepping = 0
// ASM: kernel_code_entry_byte_offset = 256
// ASM: kernel_code_prefetch_byte_size = 0
// ASM: granulated_workitem_vgpr_count = 1
// ASM: granulated_wavefront_sgpr_count = 1
// ASM: priority = 0
// ASM: float_mode = 0
// ASM: priv = 0
// ASM: enable_dx10_clamp = 0
// ASM: debug_mode = 0
// ASM: enable_ieee_mode = 0
// ASM: enable_sgpr_private_segment_wave_byte_offset = 0
// ASM: user_sgpr_count = 2
// ASM: enable_sgpr_workgroup_id_x = 0
// ASM: enable_sgpr_workgroup_id_y = 0
// ASM: enable_sgpr_workgroup_id_z = 0
// ASM: enable_sgpr_workgroup_info = 0
// ASM: enable_vgpr_workitem_id = 0
// ASM: enable_exception_msb = 0
// ASM: granulated_lds_size = 0
// ASM: enable_exception = 0
// ASM: enable_sgpr_private_segment_buffer = 0
// ASM: enable_sgpr_dispatch_ptr = 0
// ASM: enable_sgpr_queue_ptr = 0
// ASM: enable_sgpr_kernarg_segment_ptr = 1
// ASM: enable_sgpr_dispatch_id = 0
// ASM: enable_sgpr_flat_scratch_init = 0
// ASM: enable_sgpr_private_segment_size = 0
// ASM: enable_sgpr_grid_workgroup_count_x = 0
// ASM: enable_sgpr_grid_workgroup_count_y = 0
// ASM: enable_sgpr_grid_workgroup_count_z = 0
// ASM: enable_ordered_append_gds = 0
// ASM: private_element_size = 0
// ASM: is_ptr64 = 1
// ASM: is_dynamic_callstack = 0
// ASM: is_debug_enabled = 0
// ASM: is_xnack_enabled = 0
// ASM: workitem_private_segment_byte_size = 0
// ASM: workgroup_group_segment_byte_size = 0
// ASM: gds_segment_byte_size = 0
// ASM: kernarg_segment_byte_size = 16
// ASM: workgroup_fbarrier_count = 0
// ASM: wavefront_sgpr_count = 8
// ASM: workitem_vgpr_count = 16
// ASM: reserved_vgpr_first = 0
// ASM: reserved_vgpr_count = 0
// ASM: reserved_sgpr_first = 0
// ASM: reserved_sgpr_count = 0
// ASM: debug_wavefront_private_segment_offset_sgpr = 0
// ASM: debug_private_segment_buffer_sgpr = 0
// ASM: kernarg_segment_alignment = 4
// ASM: group_segment_alignment = 4
// ASM: private_segment_alignment = 4
// ASM: wavefront_size = 6
// ASM: call_convention = -1
// ASM: runtime_loader_kernel_symbol = 0
// ASM: .end_amd_kernel_code_t

View File

@ -1,31 +0,0 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM --check-prefix=ASM_700
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM --check-prefix=ASM_803
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 -show-encoding %s | FileCheck %s --check-prefix=ASM --check-prefix=ASM_810
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF --check-prefix=ELF_700
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF --check-prefix=ELF_803
// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 -show-encoding %s | llvm-readobj -S --sd - | FileCheck %s --check-prefix=ELF --check-prefix=ELF_810
// ELF: SHT_NOTE
// ELF: 0000: 04000000 08000000 01000000 414D4400
// ELF: 0010: 01000000 00000000 04000000 1B000000
// ELF_700: 0020: 03000000 414D4400 04000700 07000000
// ELF_700: 0030: 00000000 00000000 414D4400 414D4447
// ELF_803: 0020: 03000000 414D4400 04000700 08000000
// ELF_803: 0030: 00000000 03000000 414D4400 414D4447
// ELF_810: 0020: 03000000 414D4400 04000700 08000000
// ELF_810: 0030: 01000000 00000000 414D4400 414D4447
// ELF: 0040: 50550000
.hsa_code_object_version 1,0
// ASM: .hsa_code_object_version 1,0
// Test defaults
.hsa_code_object_isa
// ASM_700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM_803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
// ASM_810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
// Test expressions and symbols
.set A,2
.hsa_code_object_isa A+1,A*2,A/A+4,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 3,4,5,"AMD","AMDGPU"

View File

@ -1,13 +1,13 @@
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-HSA %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland %s | FileCheck --check-prefix=OSABI-HSA %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-HSA %s
// RUN: llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=iceland %s | FileCheck --check-prefix=OSABI-HSA %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// OSABI-HSA: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx802"
// OSABI-HSA: .amdgcn_target "amdgcn-amd-amdhsa--gfx802"
// OSABI-UNK-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-unknown--gfx802
// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx803
// OSABI-PAL-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-amdpal--gfx802
.amdgcn_target "amdgcn-amd-amdhsa--gfx802"

View File

@ -1,13 +1,14 @@
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-PAL %s
// RUN: llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s | FileCheck --check-prefix=OSABI-PAL %s
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdpal--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx802
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802"
.amdgcn_target "amdgcn-amd-amdpal--gfx802"

View File

@ -1,13 +1,14 @@
// RUN: llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx802 %s | FileCheck --check-prefix=OSABI-UNK %s
// RUN: llvm-mc -triple amdgcn-amd-unknown -mcpu=iceland %s | FileCheck --check-prefix=OSABI-UNK %s
// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx803 %s 2>&1 | FileCheck --check-prefix=OSABI-UNK-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-HSA-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s
// OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"
// OSABI-UNK-ERR: error: target id must match options
// OSABI-HSA-ERR: error: target id must match options
// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-unknown--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx802
// OSABI-PAL-ERR: error: target id must match options
.amd_amdgpu_isa "amdgcn-amd-unknown--gfx802"
.amdgcn_target "amdgcn-amd-unknown--gfx802"