[AMDGPU] gfx1250 64-bit relocations and fixups (#148951)
This commit is contained in:
parent
c7d1eae4fc
commit
2d6534b7da
@ -5734,6 +5734,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
|
||||
NODE_NAME_CASE(CONST_DATA_PTR)
|
||||
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
|
||||
NODE_NAME_CASE(PC_ADD_REL_OFFSET64)
|
||||
NODE_NAME_CASE(LDS)
|
||||
NODE_NAME_CASE(DUMMY_CHAIN)
|
||||
NODE_NAME_CASE(LOAD_D16_HI)
|
||||
|
@ -545,6 +545,7 @@ enum NodeType : unsigned {
|
||||
/// Pointer to the start of the shader's constant data.
|
||||
CONST_DATA_PTR,
|
||||
PC_ADD_REL_OFFSET,
|
||||
PC_ADD_REL_OFFSET64,
|
||||
LDS,
|
||||
|
||||
DUMMY_CHAIN,
|
||||
|
@ -2932,14 +2932,22 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
|
||||
Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg :
|
||||
B.getMRI()->createGenericVirtualRegister(ConstPtrTy);
|
||||
|
||||
MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
|
||||
.addDef(PCReg);
|
||||
if (ST.has64BitLiterals()) {
|
||||
assert(GAFlags != SIInstrInfo::MO_NONE);
|
||||
|
||||
MIB.addGlobalAddress(GV, Offset, GAFlags);
|
||||
if (GAFlags == SIInstrInfo::MO_NONE)
|
||||
MIB.addImm(0);
|
||||
else
|
||||
MIB.addGlobalAddress(GV, Offset, GAFlags + 1);
|
||||
MachineInstrBuilder MIB =
|
||||
B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET64).addDef(PCReg);
|
||||
MIB.addGlobalAddress(GV, Offset, GAFlags + 2);
|
||||
} else {
|
||||
MachineInstrBuilder MIB =
|
||||
B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET).addDef(PCReg);
|
||||
|
||||
MIB.addGlobalAddress(GV, Offset, GAFlags);
|
||||
if (GAFlags == SIInstrInfo::MO_NONE)
|
||||
MIB.addImm(0);
|
||||
else
|
||||
MIB.addGlobalAddress(GV, Offset, GAFlags + 1);
|
||||
}
|
||||
|
||||
if (!B.getMRI()->getRegClassOrNull(PCReg))
|
||||
B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
|
||||
@ -2955,6 +2963,15 @@ void AMDGPULegalizerInfo::buildAbsGlobalAddress(
|
||||
MachineRegisterInfo &MRI) const {
|
||||
bool RequiresHighHalf = PtrTy.getSizeInBits() != 32;
|
||||
|
||||
if (RequiresHighHalf && ST.has64BitLiterals()) {
|
||||
if (!MRI.getRegClassOrNull(DstReg))
|
||||
MRI.setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
|
||||
B.buildInstr(AMDGPU::S_MOV_B64)
|
||||
.addDef(DstReg)
|
||||
.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS64);
|
||||
return;
|
||||
}
|
||||
|
||||
LLT S32 = LLT::scalar(32);
|
||||
|
||||
// Use the destination directly, if and only if we store the lower address
|
||||
|
@ -50,6 +50,7 @@ static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) {
|
||||
default:
|
||||
return AMDGPUMCExpr::S_None;
|
||||
case SIInstrInfo::MO_GOTPCREL:
|
||||
case SIInstrInfo::MO_GOTPCREL64:
|
||||
return AMDGPUMCExpr::S_GOTPCREL;
|
||||
case SIInstrInfo::MO_GOTPCREL32_LO:
|
||||
return AMDGPUMCExpr::S_GOTPCREL32_LO;
|
||||
@ -59,10 +60,14 @@ static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) {
|
||||
return AMDGPUMCExpr::S_REL32_LO;
|
||||
case SIInstrInfo::MO_REL32_HI:
|
||||
return AMDGPUMCExpr::S_REL32_HI;
|
||||
case SIInstrInfo::MO_REL64:
|
||||
return AMDGPUMCExpr::S_REL64;
|
||||
case SIInstrInfo::MO_ABS32_LO:
|
||||
return AMDGPUMCExpr::S_ABS32_LO;
|
||||
case SIInstrInfo::MO_ABS32_HI:
|
||||
return AMDGPUMCExpr::S_ABS32_HI;
|
||||
case SIInstrInfo::MO_ABS64:
|
||||
return AMDGPUMCExpr::S_ABS64;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,6 +64,8 @@ unsigned AMDGPUELFObjectWriter::getRelocType(const MCFixup &Fixup,
|
||||
return ELF::R_AMDGPU_ABS32_LO;
|
||||
case AMDGPUMCExpr::S_ABS32_HI:
|
||||
return ELF::R_AMDGPU_ABS32_HI;
|
||||
case AMDGPUMCExpr::S_ABS64:
|
||||
return ELF::R_AMDGPU_ABS64;
|
||||
}
|
||||
|
||||
MCFixupKind Kind = Fixup.getKind();
|
||||
|
@ -25,6 +25,7 @@ const MCAsmInfo::AtSpecifier atSpecifiers[] = {
|
||||
{AMDGPUMCExpr::S_REL64, "rel64"},
|
||||
{AMDGPUMCExpr::S_ABS32_LO, "abs32@lo"},
|
||||
{AMDGPUMCExpr::S_ABS32_HI, "abs32@hi"},
|
||||
{AMDGPUMCExpr::S_ABS64, "abs64"},
|
||||
};
|
||||
|
||||
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
|
||||
|
@ -564,7 +564,8 @@ static bool needsPCRel(const MCExpr *Expr) {
|
||||
case MCExpr::SymbolRef: {
|
||||
auto *SE = cast<MCSymbolRefExpr>(Expr);
|
||||
auto Spec = AMDGPU::getSpecifier(SE);
|
||||
return Spec != AMDGPUMCExpr::S_ABS32_LO && Spec != AMDGPUMCExpr::S_ABS32_HI;
|
||||
return Spec != AMDGPUMCExpr::S_ABS32_LO &&
|
||||
Spec != AMDGPUMCExpr::S_ABS32_HI && Spec != AMDGPUMCExpr::S_ABS64;
|
||||
}
|
||||
case MCExpr::Binary: {
|
||||
auto *BE = cast<MCBinaryExpr>(Expr);
|
||||
@ -687,7 +688,12 @@ void AMDGPUMCCodeEmitter::getMachineOpValueCommon(
|
||||
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
|
||||
uint32_t Offset = Desc.getSize();
|
||||
assert(Offset == 4 || Offset == 8);
|
||||
addFixup(Fixups, Offset, MO.getExpr(), FK_Data_4, PCRel);
|
||||
auto OpType = Desc.operands()[OpNo].OperandType;
|
||||
MCFixupKind Kind = (STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
|
||||
OpType == AMDGPU::OPERAND_REG_IMM_INT64)
|
||||
? FK_Data_8
|
||||
: FK_Data_4;
|
||||
addFixup(Fixups, Offset, MO.getExpr(), Kind, PCRel);
|
||||
}
|
||||
|
||||
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
|
||||
|
@ -50,6 +50,7 @@ public:
|
||||
S_REL64, // symbol@rel64
|
||||
S_ABS32_LO, // symbol@abs32@lo
|
||||
S_ABS32_HI, // symbol@abs32@hi
|
||||
S_ABS64, // symbol@abs64
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -8166,6 +8166,14 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
|
||||
// $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
|
||||
// which is a 64-bit pc-relative offset from the encoding of the $symbol
|
||||
// operand to the global variable.
|
||||
if (((const GCNSubtarget &)DAG.getSubtarget()).has64BitLiterals()) {
|
||||
assert(GAFlags != SIInstrInfo::MO_NONE);
|
||||
|
||||
SDValue Ptr =
|
||||
DAG.getTargetGlobalAddress(GV, DL, MVT::i64, Offset, GAFlags + 2);
|
||||
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET64, DL, PtrVT, Ptr);
|
||||
}
|
||||
|
||||
SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags);
|
||||
SDValue PtrHi;
|
||||
if (GAFlags == SIInstrInfo::MO_NONE)
|
||||
@ -8215,6 +8223,13 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||
}
|
||||
|
||||
if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) {
|
||||
if (Subtarget->has64BitLiterals()) {
|
||||
SDValue Addr = DAG.getTargetGlobalAddress(
|
||||
GV, DL, MVT::i64, GSD->getOffset(), SIInstrInfo::MO_ABS64);
|
||||
return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64, Addr),
|
||||
0);
|
||||
}
|
||||
|
||||
SDValue AddrLo = DAG.getTargetGlobalAddress(
|
||||
GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO);
|
||||
AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0};
|
||||
|
@ -2498,6 +2498,25 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
Register Reg = MI.getOperand(0).getReg();
|
||||
MachineOperand Op = MI.getOperand(1);
|
||||
|
||||
// Create a bundle so these instructions won't be re-ordered by the
|
||||
// post-RA scheduler.
|
||||
MIBundleBuilder Bundler(MBB, MI);
|
||||
Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
|
||||
if (Op.isGlobal())
|
||||
Op.setOffset(Op.getOffset() + 4);
|
||||
Bundler.append(
|
||||
BuildMI(MF, DL, get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(Op));
|
||||
|
||||
finalizeBundle(MBB, Bundler.begin());
|
||||
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case AMDGPU::ENTER_STRICT_WWM: {
|
||||
// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
|
||||
// Whole Wave Mode is entered.
|
||||
@ -9315,13 +9334,16 @@ SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
|
||||
ArrayRef<std::pair<unsigned, const char *>>
|
||||
SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
|
||||
static const std::pair<unsigned, const char *> TargetFlags[] = {
|
||||
{ MO_GOTPCREL, "amdgpu-gotprel" },
|
||||
{ MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
|
||||
{ MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
|
||||
{ MO_REL32_LO, "amdgpu-rel32-lo" },
|
||||
{ MO_REL32_HI, "amdgpu-rel32-hi" },
|
||||
{ MO_ABS32_LO, "amdgpu-abs32-lo" },
|
||||
{ MO_ABS32_HI, "amdgpu-abs32-hi" },
|
||||
{MO_GOTPCREL, "amdgpu-gotprel"},
|
||||
{MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo"},
|
||||
{MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi"},
|
||||
{MO_GOTPCREL64, "amdgpu-gotprel64"},
|
||||
{MO_REL32_LO, "amdgpu-rel32-lo"},
|
||||
{MO_REL32_HI, "amdgpu-rel32-hi"},
|
||||
{MO_REL64, "amdgpu-rel64"},
|
||||
{MO_ABS32_LO, "amdgpu-abs32-lo"},
|
||||
{MO_ABS32_HI, "amdgpu-abs32-hi"},
|
||||
{MO_ABS64, "amdgpu-abs64"},
|
||||
};
|
||||
|
||||
return ArrayRef(TargetFlags);
|
||||
|
@ -214,16 +214,20 @@ public:
|
||||
MO_GOTPCREL32_LO = 2,
|
||||
// MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
|
||||
MO_GOTPCREL32_HI = 3,
|
||||
// MO_GOTPCREL64 -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
|
||||
MO_GOTPCREL64 = 4,
|
||||
// MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
|
||||
MO_REL32 = 4,
|
||||
MO_REL32_LO = 4,
|
||||
MO_REL32 = 5,
|
||||
MO_REL32_LO = 5,
|
||||
// MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
|
||||
MO_REL32_HI = 5,
|
||||
MO_REL32_HI = 6,
|
||||
MO_REL64 = 7,
|
||||
|
||||
MO_FAR_BRANCH_OFFSET = 6,
|
||||
MO_FAR_BRANCH_OFFSET = 8,
|
||||
|
||||
MO_ABS32_LO = 8,
|
||||
MO_ABS32_HI = 9,
|
||||
MO_ABS32_LO = 9,
|
||||
MO_ABS32_HI = 10,
|
||||
MO_ABS64 = 11,
|
||||
};
|
||||
|
||||
explicit SIInstrInfo(const GCNSubtarget &ST);
|
||||
|
@ -268,6 +268,10 @@ def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
|
||||
>;
|
||||
|
||||
def SIpc_add_rel_offset64 : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET64",
|
||||
SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
|
||||
>;
|
||||
|
||||
def SIlds : SDNode<"AMDGPUISD::LDS",
|
||||
SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
|
||||
>;
|
||||
|
@ -1144,6 +1144,14 @@ def : GCNPat <
|
||||
(SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
|
||||
>;
|
||||
|
||||
def SI_PC_ADD_REL_OFFSET64 : SPseudoInstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins si_ga:$ptr),
|
||||
[(set SReg_64:$dst,
|
||||
(i64 (SIpc_add_rel_offset64 tglobaladdr:$ptr)))]> {
|
||||
let SubtargetPredicate = Has64BitLiterals;
|
||||
}
|
||||
|
||||
def : GCNPat<
|
||||
(AMDGPUtrap timm:$trapid),
|
||||
(S_TRAP $trapid)
|
||||
|
138
llvm/test/CodeGen/AMDGPU/global-address.ll
Normal file
138
llvm/test/CodeGen/AMDGPU/global-address.ll
Normal file
@ -0,0 +1,138 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-PAL-SDAG %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-PAL-GISEL %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-PAL %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-PAL %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-HSA %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-HSA %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-HSA %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-HSA %s
|
||||
|
||||
define amdgpu_kernel void @caller_internal() {
|
||||
; GFX11-PAL-SDAG-LABEL: caller_internal:
|
||||
; GFX11-PAL-SDAG: ; %bb.0:
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s1, internal_func@abs32@hi
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s0, internal_func@abs32@lo
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-PAL-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-PAL-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-PAL-GISEL-LABEL: caller_internal:
|
||||
; GFX11-PAL-GISEL: ; %bb.0:
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s0, internal_func@abs32@lo
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s1, internal_func@abs32@hi
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-PAL-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-PAL-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-PAL-LABEL: caller_internal:
|
||||
; GFX1250-PAL: ; %bb.0:
|
||||
; GFX1250-PAL-NEXT: s_mov_b64 s[0:1], internal_func@abs64
|
||||
; GFX1250-PAL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1250-PAL-NEXT: s_swap_pc_i64 s[30:31], s[0:1]
|
||||
; GFX1250-PAL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-HSA-LABEL: caller_internal:
|
||||
; GFX11-HSA: ; %bb.0:
|
||||
; GFX11-HSA-NEXT: s_getpc_b64 s[0:1]
|
||||
; GFX11-HSA-NEXT: s_add_u32 s0, s0, internal_func@gotpcrel32@lo+4
|
||||
; GFX11-HSA-NEXT: s_addc_u32 s1, s1, internal_func@gotpcrel32@hi+12
|
||||
; GFX11-HSA-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-HSA-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-HSA-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-HSA-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-HSA-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-HSA-LABEL: caller_internal:
|
||||
; GFX1250-HSA: ; %bb.0:
|
||||
; GFX1250-HSA-NEXT: s_get_pc_i64 s[0:1]
|
||||
; GFX1250-HSA-NEXT: s_add_nc_u64 s[0:1], s[0:1], internal_func@gotpcrel+4
|
||||
; GFX1250-HSA-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1250-HSA-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX1250-HSA-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-HSA-NEXT: s_swap_pc_i64 s[30:31], s[0:1]
|
||||
; GFX1250-HSA-NEXT: s_endpgm
|
||||
call amdgpu_gfx void @internal_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @caller_exterinal() {
|
||||
; GFX11-PAL-SDAG-LABEL: caller_exterinal:
|
||||
; GFX11-PAL-SDAG: ; %bb.0:
|
||||
; GFX11-PAL-SDAG-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s12, s13
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s17, external_func@abs32@hi
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s16, external_func@abs32@lo
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s13, s14
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s14, s15
|
||||
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-PAL-SDAG-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX11-PAL-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-PAL-GISEL-LABEL: caller_exterinal:
|
||||
; GFX11-PAL-GISEL: ; %bb.0:
|
||||
; GFX11-PAL-GISEL-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s12, s13
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s16, external_func@abs32@lo
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s17, external_func@abs32@hi
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s13, s14
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s14, s15
|
||||
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-PAL-GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX11-PAL-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-PAL-LABEL: caller_exterinal:
|
||||
; GFX1250-PAL: ; %bb.0:
|
||||
; GFX1250-PAL-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX1250-PAL-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1250-PAL-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX1250-PAL-NEXT: s_mov_b64 s[12:13], external_func@abs64
|
||||
; GFX1250-PAL-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1250-PAL-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1250-PAL-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1250-PAL-NEXT: s_swap_pc_i64 s[30:31], s[12:13]
|
||||
; GFX1250-PAL-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-HSA-LABEL: caller_exterinal:
|
||||
; GFX11-HSA: ; %bb.0:
|
||||
; GFX11-HSA-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX11-HSA-NEXT: s_mov_b32 s12, s13
|
||||
; GFX11-HSA-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX11-HSA-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX11-HSA-NEXT: s_getpc_b64 s[16:17]
|
||||
; GFX11-HSA-NEXT: s_add_u32 s16, s16, external_func@rel32@lo+4
|
||||
; GFX11-HSA-NEXT: s_addc_u32 s17, s17, external_func@rel32@hi+12
|
||||
; GFX11-HSA-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX11-HSA-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX11-HSA-NEXT: s_mov_b32 s13, s14
|
||||
; GFX11-HSA-NEXT: s_mov_b32 s14, s15
|
||||
; GFX11-HSA-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-HSA-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
||||
; GFX11-HSA-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-HSA-LABEL: caller_exterinal:
|
||||
; GFX1250-HSA: ; %bb.0:
|
||||
; GFX1250-HSA-NEXT: v_mov_b32_e32 v31, v0
|
||||
; GFX1250-HSA-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; GFX1250-HSA-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GFX1250-HSA-NEXT: s_get_pc_i64 s[12:13]
|
||||
; GFX1250-HSA-NEXT: s_add_nc_u64 s[12:13], s[12:13], external_func@rel64+4
|
||||
; GFX1250-HSA-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX1250-HSA-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; GFX1250-HSA-NEXT: s_mov_b32 s32, 0
|
||||
; GFX1250-HSA-NEXT: s_swap_pc_i64 s[30:31], s[12:13]
|
||||
; GFX1250-HSA-NEXT: s_endpgm
|
||||
call void @external_func()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare amdgpu_gfx void @internal_func()
|
||||
declare hidden void @external_func()
|
@ -22,13 +22,23 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
; CHECK-LABEL: name: flags
|
||||
; CHECK: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc
|
||||
; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo
|
||||
; CHECK: S_ENDPGM 0
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @foo
|
||||
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @foo
|
||||
; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @foo
|
||||
; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-rel64) @foo
|
||||
; CHECK-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel64) @foo
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%0 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc
|
||||
%1 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo
|
||||
%2:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @foo
|
||||
%3:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @foo
|
||||
%4:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @foo
|
||||
%5:sreg_64 = S_MOV_B64 target-flags(amdgpu-rel64) @foo
|
||||
%6:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel64) @foo
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
27
llvm/test/MC/AMDGPU/fixup64.s
Normal file
27
llvm/test/MC/AMDGPU/fixup64.s
Normal file
@ -0,0 +1,27 @@
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=SI %s
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=SI-ERR --implicit-check-not=error: --strict-whitespace %s
|
||||
|
||||
.LL1:
|
||||
.LL2:
|
||||
s_mov_b64 vcc, .LL2-.LL1
|
||||
// GFX1250: s_mov_b64 vcc, .LL2-.LL1 ; encoding: [0xfe,0x01,0xea,0xbe,A,A,A,A,A,A,A,A]
|
||||
// GFX1250-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_8
|
||||
// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
|
||||
s_mov_b32 s0, .LL2-.LL1
|
||||
// SI: s_mov_b32 s0, .LL2-.LL1 ; encoding: [0xff,0x03,0x80,0xbe,A,A,A,A]
|
||||
// SI-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_4
|
||||
|
||||
// GFX1250: s_mov_b32 s0, .LL2-.LL1 ; encoding: [0xff,0x00,0x80,0xbe,A,A,A,A]
|
||||
// GFX1250-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_4
|
||||
|
||||
s_mov_b64 s[0:1], sym@abs64
|
||||
// GFX1250: s_mov_b64 s[0:1], sym@abs64 ; encoding: [0xfe,0x01,0x80,0xbe,A,A,A,A,A,A,A,A]
|
||||
// GFX1250-NEXT: ; fixup A - offset: 4, value: sym@abs64, kind: FK_Data_8
|
||||
// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
|
||||
s_mov_b64 s[0:1], callee@rel64
|
||||
// GFX1250: s_mov_b64 s[0:1], callee@rel64 ; encoding: [0xfe,0x01,0x80,0xbe,A,A,A,A,A,A,A,A]
|
||||
// GFX1250-NEXT: ; fixup A - offset: 4, value: callee@rel64, kind: FK_PCRel_8
|
||||
// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction
|
Loading…
x
Reference in New Issue
Block a user