diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3414fe758eff..280f87b82b7f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -5734,6 +5734,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) + NODE_NAME_CASE(PC_ADD_REL_OFFSET64) NODE_NAME_CASE(LDS) NODE_NAME_CASE(DUMMY_CHAIN) NODE_NAME_CASE(LOAD_D16_HI) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 0dd2183b72b2..4e8c6c7ea3b2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -545,6 +545,7 @@ enum NodeType : unsigned { /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, PC_ADD_REL_OFFSET, + PC_ADD_REL_OFFSET64, LDS, DUMMY_CHAIN, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index eb0d8b9d5b95..e7bf88d2ee5b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2932,14 +2932,22 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy, Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg : B.getMRI()->createGenericVirtualRegister(ConstPtrTy); - MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET) - .addDef(PCReg); + if (ST.has64BitLiterals()) { + assert(GAFlags != SIInstrInfo::MO_NONE); - MIB.addGlobalAddress(GV, Offset, GAFlags); - if (GAFlags == SIInstrInfo::MO_NONE) - MIB.addImm(0); - else - MIB.addGlobalAddress(GV, Offset, GAFlags + 1); + MachineInstrBuilder MIB = + B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET64).addDef(PCReg); + MIB.addGlobalAddress(GV, Offset, GAFlags + 2); + } else { + MachineInstrBuilder MIB = + B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET).addDef(PCReg); + + MIB.addGlobalAddress(GV, Offset, GAFlags); + if (GAFlags == SIInstrInfo::MO_NONE) + MIB.addImm(0); + else + MIB.addGlobalAddress(GV, Offset, GAFlags + 1); + } if (!B.getMRI()->getRegClassOrNull(PCReg)) B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass); @@ -2955,6 +2963,15 @@ void AMDGPULegalizerInfo::buildAbsGlobalAddress( MachineRegisterInfo &MRI) const { bool RequiresHighHalf = PtrTy.getSizeInBits() != 32; + if (RequiresHighHalf && ST.has64BitLiterals()) { + if (!MRI.getRegClassOrNull(DstReg)) + MRI.setRegClass(DstReg, &AMDGPU::SReg_64RegClass); + B.buildInstr(AMDGPU::S_MOV_B64) + .addDef(DstReg) + .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS64); + return; + } + LLT S32 = LLT::scalar(32); // Use the destination directly, if and only if we store the lower address diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 2dec16de940d..c84a0f6e3138 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -50,6 +50,7 @@ static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) { default: return AMDGPUMCExpr::S_None; case SIInstrInfo::MO_GOTPCREL: + case SIInstrInfo::MO_GOTPCREL64: return AMDGPUMCExpr::S_GOTPCREL; case SIInstrInfo::MO_GOTPCREL32_LO: return AMDGPUMCExpr::S_GOTPCREL32_LO; @@ -59,10 +60,14 @@ static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) { return AMDGPUMCExpr::S_REL32_LO; case SIInstrInfo::MO_REL32_HI: return AMDGPUMCExpr::S_REL32_HI; + case SIInstrInfo::MO_REL64: + return AMDGPUMCExpr::S_REL64; case SIInstrInfo::MO_ABS32_LO: return AMDGPUMCExpr::S_ABS32_LO; case SIInstrInfo::MO_ABS32_HI: return AMDGPUMCExpr::S_ABS32_HI; + case SIInstrInfo::MO_ABS64: + return AMDGPUMCExpr::S_ABS64; } } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 998ea1ffe32b..0d5a8be6220d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -64,6 +64,8 @@ unsigned AMDGPUELFObjectWriter::getRelocType(const MCFixup &Fixup, return ELF::R_AMDGPU_ABS32_LO; case AMDGPUMCExpr::S_ABS32_HI: return ELF::R_AMDGPU_ABS32_HI; + case AMDGPUMCExpr::S_ABS64: + return ELF::R_AMDGPU_ABS64; } MCFixupKind Kind = Fixup.getKind(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 31dd373e54fb..ffdac8b8ce32 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -25,6 +25,7 @@ const MCAsmInfo::AtSpecifier atSpecifiers[] = { {AMDGPUMCExpr::S_REL64, "rel64"}, {AMDGPUMCExpr::S_ABS32_LO, "abs32@lo"}, {AMDGPUMCExpr::S_ABS32_HI, "abs32@hi"}, + {AMDGPUMCExpr::S_ABS64, "abs64"}, }; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index b87bb94c70dc..f48739fe0181 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -564,7 +564,8 @@ static bool needsPCRel(const MCExpr *Expr) { case MCExpr::SymbolRef: { auto *SE = cast(Expr); auto Spec = AMDGPU::getSpecifier(SE); - return Spec != AMDGPUMCExpr::S_ABS32_LO && Spec != AMDGPUMCExpr::S_ABS32_HI; + return Spec != AMDGPUMCExpr::S_ABS32_LO && + Spec != AMDGPUMCExpr::S_ABS32_HI && Spec != AMDGPUMCExpr::S_ABS64; } case MCExpr::Binary: { auto *BE = cast(Expr); @@ -687,7 +688,12 @@ void AMDGPUMCCodeEmitter::getMachineOpValueCommon( const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint32_t Offset = Desc.getSize(); assert(Offset == 4 || Offset == 8); - addFixup(Fixups, Offset, MO.getExpr(), FK_Data_4, PCRel); + auto OpType = Desc.operands()[OpNo].OperandType; + MCFixupKind Kind = (STI.hasFeature(AMDGPU::Feature64BitLiterals) && + OpType == AMDGPU::OPERAND_REG_IMM_INT64) + ? FK_Data_8 + : FK_Data_4; + addFixup(Fixups, Offset, MO.getExpr(), Kind, PCRel); } const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h index e1b9720cdbfc..bc6fdf7f2e4c 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h @@ -50,6 +50,7 @@ public: S_REL64, // symbol@rel64 S_ABS32_LO, // symbol@abs32@lo S_ABS32_HI, // symbol@abs32@hi + S_ABS64, // symbol@abs64 }; private: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index adc221ced2bf..e449e1ab6cb2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8166,6 +8166,14 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, // $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant, // which is a 64-bit pc-relative offset from the encoding of the $symbol // operand to the global variable. + if (((const GCNSubtarget &)DAG.getSubtarget()).has64BitLiterals()) { + assert(GAFlags != SIInstrInfo::MO_NONE); + + SDValue Ptr = + DAG.getTargetGlobalAddress(GV, DL, MVT::i64, Offset, GAFlags + 2); + return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET64, DL, PtrVT, Ptr); + } + SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags); SDValue PtrHi; if (GAFlags == SIInstrInfo::MO_NONE) @@ -8215,6 +8223,13 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, } if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) { + if (Subtarget->has64BitLiterals()) { + SDValue Addr = DAG.getTargetGlobalAddress( + GV, DL, MVT::i64, GSD->getOffset(), SIInstrInfo::MO_ABS64); + return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64, Addr), + 0); + } + SDValue AddrLo = DAG.getTargetGlobalAddress( GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO); AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0}; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 044bf5b4d7eb..2fdbcb6f233c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2498,6 +2498,25 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); break; } + case AMDGPU::SI_PC_ADD_REL_OFFSET64: { + MachineFunction &MF = *MBB.getParent(); + Register Reg = MI.getOperand(0).getReg(); + MachineOperand Op = MI.getOperand(1); + + // Create a bundle so these instructions won't be re-ordered by the + // post-RA scheduler. + MIBundleBuilder Bundler(MBB, MI); + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); + if (Op.isGlobal()) + Op.setOffset(Op.getOffset() + 4); + Bundler.append( + BuildMI(MF, DL, get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(Op)); + + finalizeBundle(MBB, Bundler.begin()); + + MI.eraseFromParent(); + break; + } case AMDGPU::ENTER_STRICT_WWM: { // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when // Whole Wave Mode is entered. @@ -9315,13 +9334,16 @@ SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { ArrayRef> SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { static const std::pair TargetFlags[] = { - { MO_GOTPCREL, "amdgpu-gotprel" }, - { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, - { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, - { MO_REL32_LO, "amdgpu-rel32-lo" }, - { MO_REL32_HI, "amdgpu-rel32-hi" }, - { MO_ABS32_LO, "amdgpu-abs32-lo" }, - { MO_ABS32_HI, "amdgpu-abs32-hi" }, + {MO_GOTPCREL, "amdgpu-gotprel"}, + {MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo"}, + {MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi"}, + {MO_GOTPCREL64, "amdgpu-gotprel64"}, + {MO_REL32_LO, "amdgpu-rel32-lo"}, + {MO_REL32_HI, "amdgpu-rel32-hi"}, + {MO_REL64, "amdgpu-rel64"}, + {MO_ABS32_LO, "amdgpu-abs32-lo"}, + {MO_ABS32_HI, "amdgpu-abs32-hi"}, + {MO_ABS64, "amdgpu-abs64"}, }; return ArrayRef(TargetFlags); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 9e84822bfc27..a38019997761 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -214,16 +214,20 @@ public: MO_GOTPCREL32_LO = 2, // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. MO_GOTPCREL32_HI = 3, + // MO_GOTPCREL64 -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. + MO_GOTPCREL64 = 4, // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. - MO_REL32 = 4, - MO_REL32_LO = 4, + MO_REL32 = 5, + MO_REL32_LO = 5, // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. - MO_REL32_HI = 5, + MO_REL32_HI = 6, + MO_REL64 = 7, - MO_FAR_BRANCH_OFFSET = 6, + MO_FAR_BRANCH_OFFSET = 8, - MO_ABS32_LO = 8, - MO_ABS32_HI = 9, + MO_ABS32_LO = 9, + MO_ABS32_HI = 10, + MO_ABS64 = 11, }; explicit SIInstrInfo(const GCNSubtarget &ST); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 9ea5c75606f9..ab7d34002e9f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -268,6 +268,10 @@ def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> >; +def SIpc_add_rel_offset64 : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET64", + SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> +>; + def SIlds : SDNode<"AMDGPUISD::LDS", SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> >; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 4419ce00b473..d48eb52d2faa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1144,6 +1144,14 @@ def : GCNPat < (SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0)) >; +def SI_PC_ADD_REL_OFFSET64 : SPseudoInstSI < + (outs SReg_64:$dst), + (ins si_ga:$ptr), + [(set SReg_64:$dst, + (i64 (SIpc_add_rel_offset64 tglobaladdr:$ptr)))]> { + let SubtargetPredicate = Has64BitLiterals; +} + def : GCNPat< (AMDGPUtrap timm:$trapid), (S_TRAP $trapid) diff --git a/llvm/test/CodeGen/AMDGPU/global-address.ll b/llvm/test/CodeGen/AMDGPU/global-address.ll new file mode 100644 index 000000000000..60f4f0c762cf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/global-address.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-PAL-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-PAL-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-PAL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-PAL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-HSA %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-HSA %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-HSA %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-HSA %s + +define amdgpu_kernel void @caller_internal() { +; GFX11-PAL-SDAG-LABEL: caller_internal: +; GFX11-PAL-SDAG: ; %bb.0: +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s1, internal_func@abs32@hi +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s0, internal_func@abs32@lo +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX11-PAL-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-PAL-SDAG-NEXT: s_endpgm +; +; GFX11-PAL-GISEL-LABEL: caller_internal: +; GFX11-PAL-GISEL: ; %bb.0: +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s0, internal_func@abs32@lo +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s1, internal_func@abs32@hi +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX11-PAL-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-PAL-GISEL-NEXT: s_endpgm +; +; GFX1250-PAL-LABEL: caller_internal: +; GFX1250-PAL: ; %bb.0: +; GFX1250-PAL-NEXT: s_mov_b64 s[0:1], internal_func@abs64 +; GFX1250-PAL-NEXT: s_mov_b32 s32, 0 +; GFX1250-PAL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-PAL-NEXT: s_endpgm +; +; GFX11-HSA-LABEL: caller_internal: +; GFX11-HSA: ; %bb.0: +; GFX11-HSA-NEXT: s_getpc_b64 s[0:1] +; GFX11-HSA-NEXT: s_add_u32 s0, s0, internal_func@gotpcrel32@lo+4 +; GFX11-HSA-NEXT: s_addc_u32 s1, s1, internal_func@gotpcrel32@hi+12 +; GFX11-HSA-NEXT: s_mov_b32 s32, 0 +; GFX11-HSA-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-HSA-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-HSA-NEXT: s_endpgm +; +; GFX1250-HSA-LABEL: caller_internal: +; GFX1250-HSA: ; %bb.0: +; GFX1250-HSA-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-HSA-NEXT: s_add_nc_u64 s[0:1], s[0:1], internal_func@gotpcrel+4 +; GFX1250-HSA-NEXT: s_mov_b32 s32, 0 +; GFX1250-HSA-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX1250-HSA-NEXT: s_wait_kmcnt 0x0 +; GFX1250-HSA-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-HSA-NEXT: s_endpgm + call amdgpu_gfx void @internal_func() + ret void +} + +define amdgpu_kernel void @caller_exterinal() { +; GFX11-PAL-SDAG-LABEL: caller_exterinal: +; GFX11-PAL-SDAG: ; %bb.0: +; GFX11-PAL-SDAG-NEXT: v_mov_b32_e32 v31, v0 +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s12, s13 +; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s17, external_func@abs32@hi +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s16, external_func@abs32@lo +; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s13, s14 +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s14, s15 +; GFX11-PAL-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX11-PAL-SDAG-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX11-PAL-SDAG-NEXT: s_endpgm +; +; GFX11-PAL-GISEL-LABEL: caller_exterinal: +; GFX11-PAL-GISEL: ; %bb.0: +; GFX11-PAL-GISEL-NEXT: v_mov_b32_e32 v31, v0 +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s12, s13 +; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s16, external_func@abs32@lo +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s17, external_func@abs32@hi +; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s13, s14 +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s14, s15 +; GFX11-PAL-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX11-PAL-GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX11-PAL-GISEL-NEXT: s_endpgm +; +; GFX1250-PAL-LABEL: caller_exterinal: +; GFX1250-PAL: ; %bb.0: +; GFX1250-PAL-NEXT: v_mov_b32_e32 v31, v0 +; GFX1250-PAL-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX1250-PAL-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX1250-PAL-NEXT: s_mov_b64 s[12:13], external_func@abs64 +; GFX1250-PAL-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX1250-PAL-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX1250-PAL-NEXT: s_mov_b32 s32, 0 +; GFX1250-PAL-NEXT: s_swap_pc_i64 s[30:31], s[12:13] +; GFX1250-PAL-NEXT: s_endpgm +; +; GFX11-HSA-LABEL: caller_exterinal: +; GFX11-HSA: ; %bb.0: +; GFX11-HSA-NEXT: v_mov_b32_e32 v31, v0 +; GFX11-HSA-NEXT: s_mov_b32 s12, s13 +; GFX11-HSA-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX11-HSA-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX11-HSA-NEXT: s_getpc_b64 s[16:17] +; GFX11-HSA-NEXT: s_add_u32 s16, s16, external_func@rel32@lo+4 +; GFX11-HSA-NEXT: s_addc_u32 s17, s17, external_func@rel32@hi+12 +; GFX11-HSA-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX11-HSA-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX11-HSA-NEXT: s_mov_b32 s13, s14 +; GFX11-HSA-NEXT: s_mov_b32 s14, s15 +; GFX11-HSA-NEXT: s_mov_b32 s32, 0 +; GFX11-HSA-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX11-HSA-NEXT: s_endpgm +; +; GFX1250-HSA-LABEL: caller_exterinal: +; GFX1250-HSA: ; %bb.0: +; GFX1250-HSA-NEXT: v_mov_b32_e32 v31, v0 +; GFX1250-HSA-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX1250-HSA-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX1250-HSA-NEXT: s_get_pc_i64 s[12:13] +; GFX1250-HSA-NEXT: s_add_nc_u64 s[12:13], s[12:13], external_func@rel64+4 +; GFX1250-HSA-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX1250-HSA-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX1250-HSA-NEXT: s_mov_b32 s32, 0 +; GFX1250-HSA-NEXT: s_swap_pc_i64 s[30:31], s[12:13] +; GFX1250-HSA-NEXT: s_endpgm + call void @external_func() + ret void +} + +declare amdgpu_gfx void @internal_func() +declare hidden void @external_func() diff --git a/llvm/test/CodeGen/MIR/AMDGPU/target-flags.mir b/llvm/test/CodeGen/MIR/AMDGPU/target-flags.mir index db9eed3b223c..0de6114cd7d1 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/target-flags.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/target-flags.mir @@ -22,13 +22,23 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: flags - ; CHECK: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc - ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo - ; CHECK: S_ENDPGM 0 + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @foo + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @foo + ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @foo + ; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-rel64) @foo + ; CHECK-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel64) @foo + ; CHECK-NEXT: S_ENDPGM 0 %0 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc %1 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo %2:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @foo %3:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @foo + %4:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @foo + %5:sreg_64 = S_MOV_B64 target-flags(amdgpu-rel64) @foo + %6:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel64) @foo S_ENDPGM 0 ... diff --git a/llvm/test/MC/AMDGPU/fixup64.s b/llvm/test/MC/AMDGPU/fixup64.s new file mode 100644 index 000000000000..f86d8b2d69f4 --- /dev/null +++ b/llvm/test/MC/AMDGPU/fixup64.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=SI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=SI-ERR --implicit-check-not=error: --strict-whitespace %s + +.LL1: +.LL2: +s_mov_b64 vcc, .LL2-.LL1 +// GFX1250: s_mov_b64 vcc, .LL2-.LL1 ; encoding: [0xfe,0x01,0xea,0xbe,A,A,A,A,A,A,A,A] +// GFX1250-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_8 +// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction + +s_mov_b32 s0, .LL2-.LL1 +// SI: s_mov_b32 s0, .LL2-.LL1 ; encoding: [0xff,0x03,0x80,0xbe,A,A,A,A] +// SI-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_4 + +// GFX1250: s_mov_b32 s0, .LL2-.LL1 ; encoding: [0xff,0x00,0x80,0xbe,A,A,A,A] +// GFX1250-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_4 + +s_mov_b64 s[0:1], sym@abs64 +// GFX1250: s_mov_b64 s[0:1], sym@abs64 ; encoding: [0xfe,0x01,0x80,0xbe,A,A,A,A,A,A,A,A] +// GFX1250-NEXT: ; fixup A - offset: 4, value: sym@abs64, kind: FK_Data_8 +// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction + +s_mov_b64 s[0:1], callee@rel64 +// GFX1250: s_mov_b64 s[0:1], callee@rel64 ; encoding: [0xfe,0x01,0x80,0xbe,A,A,A,A,A,A,A,A] +// GFX1250-NEXT: ; fixup A - offset: 4, value: callee@rel64, kind: FK_PCRel_8 +// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction