[AMDGPU] gfx1250 64-bit relocations and fixups (#148951)

This commit is contained in:
Stanislav Mekhanoshin 2025-07-15 17:13:42 -07:00 committed by GitHub
parent c7d1eae4fc
commit 2d6534b7da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 287 additions and 25 deletions

View File

@ -5734,6 +5734,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(PC_ADD_REL_OFFSET64)
NODE_NAME_CASE(LDS)
NODE_NAME_CASE(DUMMY_CHAIN)
NODE_NAME_CASE(LOAD_D16_HI)

View File

@ -545,6 +545,7 @@ enum NodeType : unsigned {
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
PC_ADD_REL_OFFSET,
PC_ADD_REL_OFFSET64,
LDS,
DUMMY_CHAIN,

View File

@ -2932,14 +2932,22 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg :
B.getMRI()->createGenericVirtualRegister(ConstPtrTy);
MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
.addDef(PCReg);
if (ST.has64BitLiterals()) {
assert(GAFlags != SIInstrInfo::MO_NONE);
MIB.addGlobalAddress(GV, Offset, GAFlags);
if (GAFlags == SIInstrInfo::MO_NONE)
MIB.addImm(0);
else
MIB.addGlobalAddress(GV, Offset, GAFlags + 1);
MachineInstrBuilder MIB =
B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET64).addDef(PCReg);
MIB.addGlobalAddress(GV, Offset, GAFlags + 2);
} else {
MachineInstrBuilder MIB =
B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET).addDef(PCReg);
MIB.addGlobalAddress(GV, Offset, GAFlags);
if (GAFlags == SIInstrInfo::MO_NONE)
MIB.addImm(0);
else
MIB.addGlobalAddress(GV, Offset, GAFlags + 1);
}
if (!B.getMRI()->getRegClassOrNull(PCReg))
B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
@ -2955,6 +2963,15 @@ void AMDGPULegalizerInfo::buildAbsGlobalAddress(
MachineRegisterInfo &MRI) const {
bool RequiresHighHalf = PtrTy.getSizeInBits() != 32;
if (RequiresHighHalf && ST.has64BitLiterals()) {
if (!MRI.getRegClassOrNull(DstReg))
MRI.setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
B.buildInstr(AMDGPU::S_MOV_B64)
.addDef(DstReg)
.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS64);
return;
}
LLT S32 = LLT::scalar(32);
// Use the destination directly, if and only if we store the lower address

View File

@ -50,6 +50,7 @@ static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) {
default:
return AMDGPUMCExpr::S_None;
case SIInstrInfo::MO_GOTPCREL:
case SIInstrInfo::MO_GOTPCREL64:
return AMDGPUMCExpr::S_GOTPCREL;
case SIInstrInfo::MO_GOTPCREL32_LO:
return AMDGPUMCExpr::S_GOTPCREL32_LO;
@ -59,10 +60,14 @@ static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) {
return AMDGPUMCExpr::S_REL32_LO;
case SIInstrInfo::MO_REL32_HI:
return AMDGPUMCExpr::S_REL32_HI;
case SIInstrInfo::MO_REL64:
return AMDGPUMCExpr::S_REL64;
case SIInstrInfo::MO_ABS32_LO:
return AMDGPUMCExpr::S_ABS32_LO;
case SIInstrInfo::MO_ABS32_HI:
return AMDGPUMCExpr::S_ABS32_HI;
case SIInstrInfo::MO_ABS64:
return AMDGPUMCExpr::S_ABS64;
}
}

View File

@ -64,6 +64,8 @@ unsigned AMDGPUELFObjectWriter::getRelocType(const MCFixup &Fixup,
return ELF::R_AMDGPU_ABS32_LO;
case AMDGPUMCExpr::S_ABS32_HI:
return ELF::R_AMDGPU_ABS32_HI;
case AMDGPUMCExpr::S_ABS64:
return ELF::R_AMDGPU_ABS64;
}
MCFixupKind Kind = Fixup.getKind();

View File

@ -25,6 +25,7 @@ const MCAsmInfo::AtSpecifier atSpecifiers[] = {
{AMDGPUMCExpr::S_REL64, "rel64"},
{AMDGPUMCExpr::S_ABS32_LO, "abs32@lo"},
{AMDGPUMCExpr::S_ABS32_HI, "abs32@hi"},
{AMDGPUMCExpr::S_ABS64, "abs64"},
};
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,

View File

@ -564,7 +564,8 @@ static bool needsPCRel(const MCExpr *Expr) {
case MCExpr::SymbolRef: {
auto *SE = cast<MCSymbolRefExpr>(Expr);
auto Spec = AMDGPU::getSpecifier(SE);
return Spec != AMDGPUMCExpr::S_ABS32_LO && Spec != AMDGPUMCExpr::S_ABS32_HI;
return Spec != AMDGPUMCExpr::S_ABS32_LO &&
Spec != AMDGPUMCExpr::S_ABS32_HI && Spec != AMDGPUMCExpr::S_ABS64;
}
case MCExpr::Binary: {
auto *BE = cast<MCBinaryExpr>(Expr);
@ -687,7 +688,12 @@ void AMDGPUMCCodeEmitter::getMachineOpValueCommon(
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
uint32_t Offset = Desc.getSize();
assert(Offset == 4 || Offset == 8);
addFixup(Fixups, Offset, MO.getExpr(), FK_Data_4, PCRel);
auto OpType = Desc.operands()[OpNo].OperandType;
MCFixupKind Kind = (STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
OpType == AMDGPU::OPERAND_REG_IMM_INT64)
? FK_Data_8
: FK_Data_4;
addFixup(Fixups, Offset, MO.getExpr(), Kind, PCRel);
}
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());

View File

@ -50,6 +50,7 @@ public:
S_REL64, // symbol@rel64
S_ABS32_LO, // symbol@abs32@lo
S_ABS32_HI, // symbol@abs32@hi
S_ABS64, // symbol@abs64
};
private:

View File

@ -8166,6 +8166,14 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
// $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
// which is a 64-bit pc-relative offset from the encoding of the $symbol
// operand to the global variable.
if (((const GCNSubtarget &)DAG.getSubtarget()).has64BitLiterals()) {
assert(GAFlags != SIInstrInfo::MO_NONE);
SDValue Ptr =
DAG.getTargetGlobalAddress(GV, DL, MVT::i64, Offset, GAFlags + 2);
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET64, DL, PtrVT, Ptr);
}
SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset, GAFlags);
SDValue PtrHi;
if (GAFlags == SIInstrInfo::MO_NONE)
@ -8215,6 +8223,13 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
}
if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) {
if (Subtarget->has64BitLiterals()) {
SDValue Addr = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, GSD->getOffset(), SIInstrInfo::MO_ABS64);
return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64, Addr),
0);
}
SDValue AddrLo = DAG.getTargetGlobalAddress(
GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO);
AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0};

View File

@ -2498,6 +2498,25 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
MachineFunction &MF = *MBB.getParent();
Register Reg = MI.getOperand(0).getReg();
MachineOperand Op = MI.getOperand(1);
// Create a bundle so these instructions won't be re-ordered by the
// post-RA scheduler.
MIBundleBuilder Bundler(MBB, MI);
Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
if (Op.isGlobal())
Op.setOffset(Op.getOffset() + 4);
Bundler.append(
BuildMI(MF, DL, get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(Op));
finalizeBundle(MBB, Bundler.begin());
MI.eraseFromParent();
break;
}
case AMDGPU::ENTER_STRICT_WWM: {
// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
// Whole Wave Mode is entered.
@ -9315,13 +9334,16 @@ SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
ArrayRef<std::pair<unsigned, const char *>>
SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
static const std::pair<unsigned, const char *> TargetFlags[] = {
{ MO_GOTPCREL, "amdgpu-gotprel" },
{ MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
{ MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
{ MO_REL32_LO, "amdgpu-rel32-lo" },
{ MO_REL32_HI, "amdgpu-rel32-hi" },
{ MO_ABS32_LO, "amdgpu-abs32-lo" },
{ MO_ABS32_HI, "amdgpu-abs32-hi" },
{MO_GOTPCREL, "amdgpu-gotprel"},
{MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo"},
{MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi"},
{MO_GOTPCREL64, "amdgpu-gotprel64"},
{MO_REL32_LO, "amdgpu-rel32-lo"},
{MO_REL32_HI, "amdgpu-rel32-hi"},
{MO_REL64, "amdgpu-rel64"},
{MO_ABS32_LO, "amdgpu-abs32-lo"},
{MO_ABS32_HI, "amdgpu-abs32-hi"},
{MO_ABS64, "amdgpu-abs64"},
};
return ArrayRef(TargetFlags);

View File

@ -214,16 +214,20 @@ public:
MO_GOTPCREL32_LO = 2,
// MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
MO_GOTPCREL32_HI = 3,
// MO_GOTPCREL64 -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
MO_GOTPCREL64 = 4,
// MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
MO_REL32 = 4,
MO_REL32_LO = 4,
MO_REL32 = 5,
MO_REL32_LO = 5,
// MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
MO_REL32_HI = 5,
MO_REL32_HI = 6,
MO_REL64 = 7,
MO_FAR_BRANCH_OFFSET = 6,
MO_FAR_BRANCH_OFFSET = 8,
MO_ABS32_LO = 8,
MO_ABS32_HI = 9,
MO_ABS32_LO = 9,
MO_ABS32_HI = 10,
MO_ABS64 = 11,
};
explicit SIInstrInfo(const GCNSubtarget &ST);

View File

@ -268,6 +268,10 @@ def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
>;
def SIpc_add_rel_offset64 : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET64",
SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
>;
def SIlds : SDNode<"AMDGPUISD::LDS",
SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
>;

View File

@ -1144,6 +1144,14 @@ def : GCNPat <
(SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
>;
def SI_PC_ADD_REL_OFFSET64 : SPseudoInstSI <
(outs SReg_64:$dst),
(ins si_ga:$ptr),
[(set SReg_64:$dst,
(i64 (SIpc_add_rel_offset64 tglobaladdr:$ptr)))]> {
let SubtargetPredicate = Has64BitLiterals;
}
def : GCNPat<
(AMDGPUtrap timm:$trapid),
(S_TRAP $trapid)

View File

@ -0,0 +1,138 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-PAL-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-PAL-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-PAL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-PAL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-HSA %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck -check-prefix=GFX11-HSA %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-HSA %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-HSA %s
define amdgpu_kernel void @caller_internal() {
; GFX11-PAL-SDAG-LABEL: caller_internal:
; GFX11-PAL-SDAG: ; %bb.0:
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s1, internal_func@abs32@hi
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s0, internal_func@abs32@lo
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s32, 0
; GFX11-PAL-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-PAL-SDAG-NEXT: s_endpgm
;
; GFX11-PAL-GISEL-LABEL: caller_internal:
; GFX11-PAL-GISEL: ; %bb.0:
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s0, internal_func@abs32@lo
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s1, internal_func@abs32@hi
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s32, 0
; GFX11-PAL-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-PAL-GISEL-NEXT: s_endpgm
;
; GFX1250-PAL-LABEL: caller_internal:
; GFX1250-PAL: ; %bb.0:
; GFX1250-PAL-NEXT: s_mov_b64 s[0:1], internal_func@abs64
; GFX1250-PAL-NEXT: s_mov_b32 s32, 0
; GFX1250-PAL-NEXT: s_swap_pc_i64 s[30:31], s[0:1]
; GFX1250-PAL-NEXT: s_endpgm
;
; GFX11-HSA-LABEL: caller_internal:
; GFX11-HSA: ; %bb.0:
; GFX11-HSA-NEXT: s_getpc_b64 s[0:1]
; GFX11-HSA-NEXT: s_add_u32 s0, s0, internal_func@gotpcrel32@lo+4
; GFX11-HSA-NEXT: s_addc_u32 s1, s1, internal_func@gotpcrel32@hi+12
; GFX11-HSA-NEXT: s_mov_b32 s32, 0
; GFX11-HSA-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-HSA-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-HSA-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-HSA-NEXT: s_endpgm
;
; GFX1250-HSA-LABEL: caller_internal:
; GFX1250-HSA: ; %bb.0:
; GFX1250-HSA-NEXT: s_get_pc_i64 s[0:1]
; GFX1250-HSA-NEXT: s_add_nc_u64 s[0:1], s[0:1], internal_func@gotpcrel+4
; GFX1250-HSA-NEXT: s_mov_b32 s32, 0
; GFX1250-HSA-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1250-HSA-NEXT: s_wait_kmcnt 0x0
; GFX1250-HSA-NEXT: s_swap_pc_i64 s[30:31], s[0:1]
; GFX1250-HSA-NEXT: s_endpgm
call amdgpu_gfx void @internal_func()
ret void
}
define amdgpu_kernel void @caller_exterinal() {
; GFX11-PAL-SDAG-LABEL: caller_exterinal:
; GFX11-PAL-SDAG: ; %bb.0:
; GFX11-PAL-SDAG-NEXT: v_mov_b32_e32 v31, v0
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s12, s13
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s17, external_func@abs32@hi
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s16, external_func@abs32@lo
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX11-PAL-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s13, s14
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s14, s15
; GFX11-PAL-SDAG-NEXT: s_mov_b32 s32, 0
; GFX11-PAL-SDAG-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-PAL-SDAG-NEXT: s_endpgm
;
; GFX11-PAL-GISEL-LABEL: caller_exterinal:
; GFX11-PAL-GISEL: ; %bb.0:
; GFX11-PAL-GISEL-NEXT: v_mov_b32_e32 v31, v0
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s12, s13
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s16, external_func@abs32@lo
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s17, external_func@abs32@hi
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX11-PAL-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s13, s14
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s14, s15
; GFX11-PAL-GISEL-NEXT: s_mov_b32 s32, 0
; GFX11-PAL-GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-PAL-GISEL-NEXT: s_endpgm
;
; GFX1250-PAL-LABEL: caller_exterinal:
; GFX1250-PAL: ; %bb.0:
; GFX1250-PAL-NEXT: v_mov_b32_e32 v31, v0
; GFX1250-PAL-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1250-PAL-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX1250-PAL-NEXT: s_mov_b64 s[12:13], external_func@abs64
; GFX1250-PAL-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1250-PAL-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1250-PAL-NEXT: s_mov_b32 s32, 0
; GFX1250-PAL-NEXT: s_swap_pc_i64 s[30:31], s[12:13]
; GFX1250-PAL-NEXT: s_endpgm
;
; GFX11-HSA-LABEL: caller_exterinal:
; GFX11-HSA: ; %bb.0:
; GFX11-HSA-NEXT: v_mov_b32_e32 v31, v0
; GFX11-HSA-NEXT: s_mov_b32 s12, s13
; GFX11-HSA-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX11-HSA-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX11-HSA-NEXT: s_getpc_b64 s[16:17]
; GFX11-HSA-NEXT: s_add_u32 s16, s16, external_func@rel32@lo+4
; GFX11-HSA-NEXT: s_addc_u32 s17, s17, external_func@rel32@hi+12
; GFX11-HSA-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX11-HSA-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX11-HSA-NEXT: s_mov_b32 s13, s14
; GFX11-HSA-NEXT: s_mov_b32 s14, s15
; GFX11-HSA-NEXT: s_mov_b32 s32, 0
; GFX11-HSA-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-HSA-NEXT: s_endpgm
;
; GFX1250-HSA-LABEL: caller_exterinal:
; GFX1250-HSA: ; %bb.0:
; GFX1250-HSA-NEXT: v_mov_b32_e32 v31, v0
; GFX1250-HSA-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1250-HSA-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX1250-HSA-NEXT: s_get_pc_i64 s[12:13]
; GFX1250-HSA-NEXT: s_add_nc_u64 s[12:13], s[12:13], external_func@rel64+4
; GFX1250-HSA-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1250-HSA-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1250-HSA-NEXT: s_mov_b32 s32, 0
; GFX1250-HSA-NEXT: s_swap_pc_i64 s[30:31], s[12:13]
; GFX1250-HSA-NEXT: s_endpgm
call void @external_func()
ret void
}
declare amdgpu_gfx void @internal_func()
declare hidden void @external_func()

View File

@ -22,13 +22,23 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: flags
; CHECK: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc
; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo
; CHECK: S_ENDPGM 0
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @foo
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @foo
; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @foo
; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-rel64) @foo
; CHECK-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel64) @foo
; CHECK-NEXT: S_ENDPGM 0
%0 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 12, implicit-def dead $scc
%1 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo
%2:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @foo
%3:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @foo
%4:sreg_64 = S_MOV_B64 target-flags(amdgpu-abs64) @foo
%5:sreg_64 = S_MOV_B64 target-flags(amdgpu-rel64) @foo
%6:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel64) @foo
S_ENDPGM 0
...

View File

@ -0,0 +1,27 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=SI %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=SI-ERR --implicit-check-not=error: --strict-whitespace %s
.LL1:
.LL2:
s_mov_b64 vcc, .LL2-.LL1
// GFX1250: s_mov_b64 vcc, .LL2-.LL1 ; encoding: [0xfe,0x01,0xea,0xbe,A,A,A,A,A,A,A,A]
// GFX1250-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_8
// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b32 s0, .LL2-.LL1
// SI: s_mov_b32 s0, .LL2-.LL1 ; encoding: [0xff,0x03,0x80,0xbe,A,A,A,A]
// SI-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_4
// GFX1250: s_mov_b32 s0, .LL2-.LL1 ; encoding: [0xff,0x00,0x80,0xbe,A,A,A,A]
// GFX1250-NEXT: ; fixup A - offset: 4, value: .LL2-.LL1, kind: FK_Data_4
s_mov_b64 s[0:1], sym@abs64
// GFX1250: s_mov_b64 s[0:1], sym@abs64 ; encoding: [0xfe,0x01,0x80,0xbe,A,A,A,A,A,A,A,A]
// GFX1250-NEXT: ; fixup A - offset: 4, value: sym@abs64, kind: FK_Data_8
// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64 s[0:1], callee@rel64
// GFX1250: s_mov_b64 s[0:1], callee@rel64 ; encoding: [0xfe,0x01,0x80,0xbe,A,A,A,A,A,A,A,A]
// GFX1250-NEXT: ; fixup A - offset: 4, value: callee@rel64, kind: FK_PCRel_8
// SI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: invalid operand for instruction