Compare commits
5 Commits
main
...
users/rovk
Author | SHA1 | Date | |
---|---|---|---|
![]() |
d39acfa3ba | ||
![]() |
b0fb6a7e2b | ||
![]() |
20b30e00a6 | ||
![]() |
36019142eb | ||
![]() |
ff93c766ca |
@ -7975,12 +7975,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
|||||||
}
|
}
|
||||||
case Intrinsic::amdgcn_call_whole_wave: {
|
case Intrinsic::amdgcn_call_whole_wave: {
|
||||||
TargetLowering::ArgListTy Args;
|
TargetLowering::ArgListTy Args;
|
||||||
|
bool isTailCall = I.isTailCall();
|
||||||
|
|
||||||
// The first argument is the callee. Skip it when assembling the call args.
|
// The first argument is the callee. Skip it when assembling the call args.
|
||||||
for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
|
for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
|
||||||
TargetLowering::ArgListEntry Arg(getValue(I.getArgOperand(Idx)),
|
TargetLowering::ArgListEntry Arg(getValue(I.getArgOperand(Idx)),
|
||||||
I.getArgOperand(Idx)->getType());
|
I.getArgOperand(Idx)->getType());
|
||||||
Arg.setAttributes(&I, Idx);
|
Arg.setAttributes(&I, Idx);
|
||||||
|
|
||||||
|
// If we have an explicit sret argument that is an Instruction, (i.e., it
|
||||||
|
// might point to function-local memory), we can't meaningfully tail-call.
|
||||||
|
if (Arg.IsSRet && isa<Instruction>(I.getArgOperand(Idx)))
|
||||||
|
isTailCall = false;
|
||||||
|
|
||||||
Args.push_back(Arg);
|
Args.push_back(Arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7995,7 +8002,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
|||||||
.setChain(getRoot())
|
.setChain(getRoot())
|
||||||
.setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
|
.setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
|
||||||
getValue(I.getArgOperand(0)), std::move(Args))
|
getValue(I.getArgOperand(0)), std::move(Args))
|
||||||
.setTailCall(false)
|
.setTailCall(isTailCall && canTailCall(I))
|
||||||
.setIsPreallocated(
|
.setIsPreallocated(
|
||||||
I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
|
I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
|
||||||
.setConvergent(I.isConvergent())
|
.setConvergent(I.isConvergent())
|
||||||
@ -8902,6 +8909,29 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
|
|||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const {
|
||||||
|
bool isMustTailCall = CB.isMustTailCall();
|
||||||
|
|
||||||
|
// Avoid emitting tail calls in functions with the disable-tail-calls
|
||||||
|
// attribute.
|
||||||
|
const Function *Caller = CB.getParent()->getParent();
|
||||||
|
if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
|
||||||
|
"true" &&
|
||||||
|
!isMustTailCall)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We can't tail call inside a function with a swifterror argument. Lowering
|
||||||
|
// does not support this yet. It would have to move into the swifterror
|
||||||
|
// register before the call.
|
||||||
|
if (DAG.getTargetLoweringInfo().supportSwiftError() &&
|
||||||
|
Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Check if target-independent constraints permit a tail call here.
|
||||||
|
// Target-dependent constraints are checked within TLI->LowerCallTo.
|
||||||
|
return isInTailCallPosition(CB, DAG.getTarget());
|
||||||
|
}
|
||||||
|
|
||||||
void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
|
void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
|
||||||
bool isTailCall, bool isMustTailCall,
|
bool isTailCall, bool isMustTailCall,
|
||||||
const BasicBlock *EHPadBB,
|
const BasicBlock *EHPadBB,
|
||||||
@ -8916,21 +8946,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
|
|||||||
const Value *SwiftErrorVal = nullptr;
|
const Value *SwiftErrorVal = nullptr;
|
||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
|
||||||
if (isTailCall) {
|
if (isTailCall)
|
||||||
// Avoid emitting tail calls in functions with the disable-tail-calls
|
isTailCall = canTailCall(CB);
|
||||||
// attribute.
|
|
||||||
auto *Caller = CB.getParent()->getParent();
|
|
||||||
if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
|
|
||||||
"true" && !isMustTailCall)
|
|
||||||
isTailCall = false;
|
|
||||||
|
|
||||||
// We can't tail call inside a function with a swifterror argument. Lowering
|
|
||||||
// does not support this yet. It would have to move into the swifterror
|
|
||||||
// register before the call.
|
|
||||||
if (TLI.supportSwiftError() &&
|
|
||||||
Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
|
|
||||||
isTailCall = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
|
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
|
||||||
const Value *V = *I;
|
const Value *V = *I;
|
||||||
@ -8970,11 +8987,6 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
|
|||||||
Args.push_back(Entry);
|
Args.push_back(Entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if target-independent constraints permit a tail call here.
|
|
||||||
// Target-dependent constraints are checked within TLI->LowerCallTo.
|
|
||||||
if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
|
|
||||||
isTailCall = false;
|
|
||||||
|
|
||||||
// Disable tail calls if there is an swifterror argument. Targets have not
|
// Disable tail calls if there is an swifterror argument. Targets have not
|
||||||
// been updated to support tail calls.
|
// been updated to support tail calls.
|
||||||
if (TLI.supportSwiftError() && SwiftErrorVal)
|
if (TLI.supportSwiftError() && SwiftErrorVal)
|
||||||
|
@ -408,6 +408,10 @@ public:
|
|||||||
bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr,
|
bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr,
|
||||||
const TargetLowering::PtrAuthInfo *PAI = nullptr);
|
const TargetLowering::PtrAuthInfo *PAI = nullptr);
|
||||||
|
|
||||||
|
// Check some of the target-independent constraints for tail calls. This does
|
||||||
|
// not iterate over the call arguments.
|
||||||
|
bool canTailCall(const CallBase &CB) const;
|
||||||
|
|
||||||
// Lower range metadata from 0 to N to assert zext to an integer of nearest
|
// Lower range metadata from 0 to N to assert zext to an integer of nearest
|
||||||
// floor power of two.
|
// floor power of two.
|
||||||
SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
|
SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
|
||||||
|
@ -1004,8 +1004,14 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
|
|||||||
return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
|
return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
|
||||||
}
|
}
|
||||||
|
|
||||||
return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
|
if (CallerF.getFunction().getCallingConv() ==
|
||||||
AMDGPU::SI_TCRETURN;
|
CallingConv::AMDGPU_Gfx_WholeWave)
|
||||||
|
return AMDGPU::SI_TCRETURN_GFX_WholeWave;
|
||||||
|
|
||||||
|
if (CC == CallingConv::AMDGPU_Gfx || CC == CallingConv::AMDGPU_Gfx_WholeWave)
|
||||||
|
return AMDGPU::SI_TCRETURN_GFX;
|
||||||
|
|
||||||
|
return AMDGPU::SI_TCRETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add operands to call instruction to track the callee.
|
// Add operands to call instruction to track the callee.
|
||||||
@ -1284,6 +1290,13 @@ bool AMDGPUCallLowering::lowerTailCall(
|
|||||||
unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), /*IsTailCall*/ true,
|
unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), /*IsTailCall*/ true,
|
||||||
ST.isWave32(), CalleeCC, IsDynamicVGPRChainCall);
|
ST.isWave32(), CalleeCC, IsDynamicVGPRChainCall);
|
||||||
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
|
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
|
||||||
|
|
||||||
|
if (FuncInfo->isWholeWaveFunction())
|
||||||
|
addOriginalExecToReturn(MF, MIB);
|
||||||
|
|
||||||
|
// Keep track of the index of the next operand to be added to the call
|
||||||
|
unsigned CalleeIdx = MIB->getNumOperands();
|
||||||
|
|
||||||
if (!addCallTargetOperands(MIB, MIRBuilder, Info, IsDynamicVGPRChainCall))
|
if (!addCallTargetOperands(MIB, MIRBuilder, Info, IsDynamicVGPRChainCall))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -1401,7 +1414,7 @@ bool AMDGPUCallLowering::lowerTailCall(
|
|||||||
// If we have -tailcallopt, we need to adjust the stack. We'll do the call
|
// If we have -tailcallopt, we need to adjust the stack. We'll do the call
|
||||||
// sequence start and end here.
|
// sequence start and end here.
|
||||||
if (!IsSibCall) {
|
if (!IsSibCall) {
|
||||||
MIB->getOperand(1).setImm(FPDiff);
|
MIB->getOperand(CalleeIdx + 1).setImm(FPDiff);
|
||||||
CallSeqStart.addImm(NumBytes).addImm(0);
|
CallSeqStart.addImm(NumBytes).addImm(0);
|
||||||
// End the call sequence *before* emitting the call. Normally, we would
|
// End the call sequence *before* emitting the call. Normally, we would
|
||||||
// tidy the frame up after the call. However, here, we've laid out the
|
// tidy the frame up after the call. However, here, we've laid out the
|
||||||
@ -1413,16 +1426,24 @@ bool AMDGPUCallLowering::lowerTailCall(
|
|||||||
// Now we can add the actual call instruction to the correct basic block.
|
// Now we can add the actual call instruction to the correct basic block.
|
||||||
MIRBuilder.insertInstr(MIB);
|
MIRBuilder.insertInstr(MIB);
|
||||||
|
|
||||||
|
// If this is a whole wave tail call, we need to constrain the register for
|
||||||
|
// the original EXEC.
|
||||||
|
if (MIB->getOpcode() == AMDGPU::SI_TCRETURN_GFX_WholeWave) {
|
||||||
|
MIB->getOperand(0).setReg(
|
||||||
|
constrainOperandRegClass(MF, *TRI, MRI, *TII, *ST.getRegBankInfo(),
|
||||||
|
*MIB, MIB->getDesc(), MIB->getOperand(0), 0));
|
||||||
|
}
|
||||||
|
|
||||||
// If Callee is a reg, since it is used by a target specific
|
// If Callee is a reg, since it is used by a target specific
|
||||||
// instruction, it must have a register class matching the
|
// instruction, it must have a register class matching the
|
||||||
// constraint of that instruction.
|
// constraint of that instruction.
|
||||||
|
|
||||||
// FIXME: We should define regbankselectable call instructions to handle
|
// FIXME: We should define regbankselectable call instructions to handle
|
||||||
// divergent call targets.
|
// divergent call targets.
|
||||||
if (MIB->getOperand(0).isReg()) {
|
if (MIB->getOperand(CalleeIdx).isReg()) {
|
||||||
MIB->getOperand(0).setReg(
|
MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
|
||||||
constrainOperandRegClass(MF, *TRI, MRI, *TII, *ST.getRegBankInfo(),
|
MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),
|
||||||
*MIB, MIB->getDesc(), MIB->getOperand(0), 0));
|
MIB->getOperand(CalleeIdx), CalleeIdx));
|
||||||
}
|
}
|
||||||
|
|
||||||
MF.getFrameInfo().setHasTailCall();
|
MF.getFrameInfo().setHasTailCall();
|
||||||
|
@ -5646,6 +5646,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
NODE_NAME_CASE(CALL)
|
NODE_NAME_CASE(CALL)
|
||||||
NODE_NAME_CASE(TC_RETURN)
|
NODE_NAME_CASE(TC_RETURN)
|
||||||
NODE_NAME_CASE(TC_RETURN_GFX)
|
NODE_NAME_CASE(TC_RETURN_GFX)
|
||||||
|
NODE_NAME_CASE(TC_RETURN_GFX_WholeWave)
|
||||||
NODE_NAME_CASE(TC_RETURN_CHAIN)
|
NODE_NAME_CASE(TC_RETURN_CHAIN)
|
||||||
NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR)
|
NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR)
|
||||||
NODE_NAME_CASE(TRAP)
|
NODE_NAME_CASE(TRAP)
|
||||||
|
@ -412,6 +412,7 @@ enum NodeType : unsigned {
|
|||||||
CALL,
|
CALL,
|
||||||
TC_RETURN,
|
TC_RETURN,
|
||||||
TC_RETURN_GFX,
|
TC_RETURN_GFX,
|
||||||
|
TC_RETURN_GFX_WholeWave,
|
||||||
TC_RETURN_CHAIN,
|
TC_RETURN_CHAIN,
|
||||||
TC_RETURN_CHAIN_DVGPR,
|
TC_RETURN_CHAIN_DVGPR,
|
||||||
TRAP,
|
TRAP,
|
||||||
|
@ -94,6 +94,10 @@ def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP,
|
|||||||
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
|
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def AMDGPUtc_return_gfx_ww: SDNode<"AMDGPUISD::TC_RETURN_GFX_WholeWave", AMDGPUTCReturnTP,
|
||||||
|
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
|
||||||
|
>;
|
||||||
|
|
||||||
def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN",
|
def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN",
|
||||||
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
|
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
|
||||||
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
|
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
|
||||||
|
@ -1132,9 +1132,18 @@ void SIFrameLowering::emitCSRSpillRestores(
|
|||||||
RestoreWWMRegisters(WWMCalleeSavedRegs);
|
RestoreWWMRegisters(WWMCalleeSavedRegs);
|
||||||
|
|
||||||
// The original EXEC is the first operand of the return instruction.
|
// The original EXEC is the first operand of the return instruction.
|
||||||
const MachineInstr &Return = MBB.instr_back();
|
MachineInstr &Return = MBB.instr_back();
|
||||||
assert(Return.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN &&
|
unsigned Opcode = Return.getOpcode();
|
||||||
"Unexpected return inst");
|
switch (Opcode) {
|
||||||
|
case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
|
||||||
|
Opcode = AMDGPU::SI_RETURN;
|
||||||
|
break;
|
||||||
|
case AMDGPU::SI_TCRETURN_GFX_WholeWave:
|
||||||
|
Opcode = AMDGPU::SI_TCRETURN_GFX;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Unexpected return inst");
|
||||||
|
}
|
||||||
Register OrigExec = Return.getOperand(0).getReg();
|
Register OrigExec = Return.getOperand(0).getReg();
|
||||||
|
|
||||||
if (!WWMScratchRegs.empty()) {
|
if (!WWMScratchRegs.empty()) {
|
||||||
@ -1148,6 +1157,11 @@ void SIFrameLowering::emitCSRSpillRestores(
|
|||||||
// Restore original EXEC.
|
// Restore original EXEC.
|
||||||
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);
|
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);
|
||||||
|
|
||||||
|
// Drop the first operand and update the opcode.
|
||||||
|
Return.removeOperand(0);
|
||||||
|
Return.setDesc(TII->get(Opcode));
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4260,6 +4260,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the caller is a whole wave function, we need to use a special opcode
|
||||||
|
// so we can patch up EXEC.
|
||||||
|
if (Info->isWholeWaveFunction())
|
||||||
|
OPC = AMDGPUISD::TC_RETURN_GFX_WholeWave;
|
||||||
|
|
||||||
return DAG.getNode(OPC, DL, MVT::Other, Ops);
|
return DAG.getNode(OPC, DL, MVT::Other, Ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6023,14 +6028,15 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
return SplitBB;
|
return SplitBB;
|
||||||
}
|
}
|
||||||
|
case AMDGPU::SI_TCRETURN_GFX_WholeWave:
|
||||||
case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN: {
|
case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN: {
|
||||||
assert(MFI->isWholeWaveFunction());
|
assert(MFI->isWholeWaveFunction());
|
||||||
|
|
||||||
// During ISel, it's difficult to propagate the original EXEC mask to use as
|
// During ISel, it's difficult to propagate the original EXEC mask to use as
|
||||||
// an input to SI_WHOLE_WAVE_FUNC_RETURN. Set it up here instead.
|
// an input to SI_WHOLE_WAVE_FUNC_RETURN. Set it up here instead.
|
||||||
MachineInstr *Setup = TII->getWholeWaveFunctionSetup(*BB->getParent());
|
MachineInstr *Setup = TII->getWholeWaveFunctionSetup(*BB->getParent());
|
||||||
Register OriginalExec = Setup->getOperand(0).getReg();
|
|
||||||
assert(Setup && "Couldn't find SI_SETUP_WHOLE_WAVE_FUNC");
|
assert(Setup && "Couldn't find SI_SETUP_WHOLE_WAVE_FUNC");
|
||||||
|
Register OriginalExec = Setup->getOperand(0).getReg();
|
||||||
MF->getRegInfo().clearKillFlags(OriginalExec);
|
MF->getRegInfo().clearKillFlags(OriginalExec);
|
||||||
MI.getOperand(0).setReg(OriginalExec);
|
MI.getOperand(0).setReg(OriginalExec);
|
||||||
return BB;
|
return BB;
|
||||||
|
@ -2473,7 +2473,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||||||
MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
|
MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
|
|
||||||
case AMDGPU::SI_RETURN: {
|
case AMDGPU::SI_RETURN: {
|
||||||
const MachineFunction *MF = MBB.getParent();
|
const MachineFunction *MF = MBB.getParent();
|
||||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||||
|
@ -670,6 +670,33 @@ def SI_WHOLE_WAVE_FUNC_RETURN : SPseudoInstSI <
|
|||||||
def : GCNPat<
|
def : GCNPat<
|
||||||
(AMDGPUwhole_wave_return), (SI_WHOLE_WAVE_FUNC_RETURN (i1 (IMPLICIT_DEF)))>;
|
(AMDGPUwhole_wave_return), (SI_WHOLE_WAVE_FUNC_RETURN (i1 (IMPLICIT_DEF)))>;
|
||||||
|
|
||||||
|
// Restores the previous EXEC and otherwise behaves entirely like a SI_TCRETURN.
|
||||||
|
// This is used for tail calls *from* a whole wave function. Tail calls to
|
||||||
|
// a whole wave function may use the usual opcodes, depending on the calling
|
||||||
|
// convention of the caller.
|
||||||
|
def SI_TCRETURN_GFX_WholeWave : SPseudoInstSI <
|
||||||
|
(outs),
|
||||||
|
(ins SReg_1:$orig_exec, Gfx_CCR_SGPR_64:$src0, unknown:$callee, i32imm:$fpdiff)> {
|
||||||
|
let isCall = 1;
|
||||||
|
let isTerminator = 1;
|
||||||
|
let isReturn = 1;
|
||||||
|
let isBarrier = 1;
|
||||||
|
let UseNamedOperandTable = 1;
|
||||||
|
let SchedRW = [WriteBranch];
|
||||||
|
let isConvergent = 1;
|
||||||
|
|
||||||
|
// We're going to use custom handling to set the $orig_exec to the correct value.
|
||||||
|
let usesCustomInserter = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate a SI_TCRETURN_GFX_WholeWave pseudo with a placeholder for its
|
||||||
|
// argument. It will be filled in by the custom inserter.
|
||||||
|
def : GCNPat<
|
||||||
|
(AMDGPUtc_return_gfx_ww i64:$src0, tglobaladdr:$callee, i32:$fpdiff),
|
||||||
|
(SI_TCRETURN_GFX_WholeWave (i1 (IMPLICIT_DEF)), Gfx_CCR_SGPR_64:$src0,
|
||||||
|
tglobaladdr:$callee, i32:$fpdiff)>;
|
||||||
|
|
||||||
|
|
||||||
// Return for returning shaders to a shader variant epilog.
|
// Return for returning shaders to a shader variant epilog.
|
||||||
def SI_RETURN_TO_EPILOG : SPseudoInstSI <
|
def SI_RETURN_TO_EPILOG : SPseudoInstSI <
|
||||||
(outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
|
(outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
|
||||||
|
@ -1513,6 +1513,7 @@ constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
|
|||||||
switch (CC) {
|
switch (CC) {
|
||||||
case CallingConv::C:
|
case CallingConv::C:
|
||||||
case CallingConv::AMDGPU_Gfx:
|
case CallingConv::AMDGPU_Gfx:
|
||||||
|
case CallingConv::AMDGPU_Gfx_WholeWave:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return canGuaranteeTCO(CC);
|
return canGuaranteeTCO(CC);
|
||||||
|
@ -96,6 +96,672 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define amdgpu_gfx i32 @tail_call_from_gfx(i32 %x, i32 inreg %c) {
|
||||||
|
; DAGISEL-LABEL: tail_call_from_gfx:
|
||||||
|
; DAGISEL: ; %bb.0:
|
||||||
|
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_expcnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_samplecnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0
|
||||||
|
; DAGISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi
|
||||||
|
; DAGISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo
|
||||||
|
; DAGISEL-NEXT: s_wait_alu 0xfffe
|
||||||
|
; DAGISEL-NEXT: s_setpc_b64 s[0:1]
|
||||||
|
;
|
||||||
|
; GISEL-LABEL: tail_call_from_gfx:
|
||||||
|
; GISEL: ; %bb.0:
|
||||||
|
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_expcnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_samplecnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0
|
||||||
|
; GISEL-NEXT: s_mov_b32 s36, good_callee@abs32@lo
|
||||||
|
; GISEL-NEXT: s_mov_b32 s37, good_callee@abs32@hi
|
||||||
|
; GISEL-NEXT: s_wait_alu 0xfffe
|
||||||
|
; GISEL-NEXT: s_setpc_b64 s[36:37]
|
||||||
|
%y = add i32 %x, 13
|
||||||
|
%ret = tail call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c)
|
||||||
|
ret i32 %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_gfx_whole_wave i32 @tail_call_from_whole_wave(i1 %active, i32 %x, i32 inreg %c) {
|
||||||
|
; DAGISEL-LABEL: tail_call_from_whole_wave:
|
||||||
|
; DAGISEL: ; %bb.0:
|
||||||
|
; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_expcnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_samplecnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_bvhcnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508
|
||||||
|
; DAGISEL-NEXT: s_clause 0xf
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568
|
||||||
|
; DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572
|
||||||
|
; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
|
||||||
|
; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0
|
||||||
|
; DAGISEL-NEXT: s_mov_b32 s37, good_callee@abs32@hi
|
||||||
|
; DAGISEL-NEXT: s_mov_b32 s36, good_callee@abs32@lo
|
||||||
|
; DAGISEL-NEXT: s_wait_alu 0xfffe
|
||||||
|
; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v0, off, s32
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v6, off, s32 offset:24
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v7, off, s32 offset:28
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v8, off, s32 offset:32
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v9, off, s32 offset:36
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v10, off, s32 offset:40
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v11, off, s32 offset:44
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v12, off, s32 offset:48
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v13, off, s32 offset:52
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v14, off, s32 offset:56
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v15, off, s32 offset:60
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v16, off, s32 offset:64
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v17, off, s32 offset:68
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v18, off, s32 offset:72
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v19, off, s32 offset:76
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v20, off, s32 offset:80
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v21, off, s32 offset:84
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v22, off, s32 offset:88
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v23, off, s32 offset:92
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v24, off, s32 offset:96
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v25, off, s32 offset:100
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v26, off, s32 offset:104
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v27, off, s32 offset:108
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v28, off, s32 offset:112
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v29, off, s32 offset:116
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v30, off, s32 offset:120
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v31, off, s32 offset:124
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v32, off, s32 offset:128
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v33, off, s32 offset:132
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v34, off, s32 offset:136
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v35, off, s32 offset:140
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v36, off, s32 offset:144
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v37, off, s32 offset:148
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v38, off, s32 offset:152
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v39, off, s32 offset:156
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v48, off, s32 offset:160
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v49, off, s32 offset:164
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v50, off, s32 offset:168
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v51, off, s32 offset:172
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v52, off, s32 offset:176
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v53, off, s32 offset:180
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v54, off, s32 offset:184
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v55, off, s32 offset:188
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v64, off, s32 offset:192
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v65, off, s32 offset:196
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v66, off, s32 offset:200
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v67, off, s32 offset:204
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v68, off, s32 offset:208
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v69, off, s32 offset:212
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v70, off, s32 offset:216
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v71, off, s32 offset:220
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v80, off, s32 offset:224
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v81, off, s32 offset:228
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v82, off, s32 offset:232
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v83, off, s32 offset:236
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v84, off, s32 offset:240
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v85, off, s32 offset:244
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v86, off, s32 offset:248
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v87, off, s32 offset:252
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v96, off, s32 offset:256
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v97, off, s32 offset:260
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v98, off, s32 offset:264
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v99, off, s32 offset:268
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v100, off, s32 offset:272
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v101, off, s32 offset:276
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v102, off, s32 offset:280
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v103, off, s32 offset:284
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v112, off, s32 offset:288
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v113, off, s32 offset:292
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v114, off, s32 offset:296
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v115, off, s32 offset:300
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v116, off, s32 offset:304
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v117, off, s32 offset:308
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v118, off, s32 offset:312
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v119, off, s32 offset:316
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v128, off, s32 offset:320
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v129, off, s32 offset:324
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v130, off, s32 offset:328
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v131, off, s32 offset:332
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v132, off, s32 offset:336
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v133, off, s32 offset:340
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v134, off, s32 offset:344
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v135, off, s32 offset:348
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v144, off, s32 offset:352
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v145, off, s32 offset:356
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v146, off, s32 offset:360
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v147, off, s32 offset:364
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v148, off, s32 offset:368
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v149, off, s32 offset:372
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v150, off, s32 offset:376
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v151, off, s32 offset:380
|
||||||
|
; DAGISEL-NEXT: s_clause 0x1f
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v160, off, s32 offset:384
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v161, off, s32 offset:388
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v162, off, s32 offset:392
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v163, off, s32 offset:396
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v164, off, s32 offset:400
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v165, off, s32 offset:404
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v166, off, s32 offset:408
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v167, off, s32 offset:412
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v176, off, s32 offset:416
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v177, off, s32 offset:420
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v178, off, s32 offset:424
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v179, off, s32 offset:428
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v180, off, s32 offset:432
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v181, off, s32 offset:436
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v182, off, s32 offset:440
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v183, off, s32 offset:444
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v192, off, s32 offset:448
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v193, off, s32 offset:452
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v194, off, s32 offset:456
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v195, off, s32 offset:460
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v196, off, s32 offset:464
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v197, off, s32 offset:468
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v198, off, s32 offset:472
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v199, off, s32 offset:476
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v208, off, s32 offset:480
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v209, off, s32 offset:484
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v210, off, s32 offset:488
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v211, off, s32 offset:492
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v212, off, s32 offset:496
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v213, off, s32 offset:500
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v214, off, s32 offset:504
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v215, off, s32 offset:508
|
||||||
|
; DAGISEL-NEXT: s_clause 0xf
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v224, off, s32 offset:512
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v225, off, s32 offset:516
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v226, off, s32 offset:520
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v227, off, s32 offset:524
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v228, off, s32 offset:528
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v229, off, s32 offset:532
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v230, off, s32 offset:536
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v231, off, s32 offset:540
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v240, off, s32 offset:544
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v241, off, s32 offset:548
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v242, off, s32 offset:552
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v243, off, s32 offset:556
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v244, off, s32 offset:560
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v245, off, s32 offset:564
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v246, off, s32 offset:568
|
||||||
|
; DAGISEL-NEXT: scratch_load_b32 v247, off, s32 offset:572
|
||||||
|
; DAGISEL-NEXT: s_mov_b32 exec_lo, s0
|
||||||
|
; DAGISEL-NEXT: s_setpc_b64 s[36:37]
|
||||||
|
;
|
||||||
|
; GISEL-LABEL: tail_call_from_whole_wave:
|
||||||
|
; GISEL: ; %bb.0:
|
||||||
|
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_expcnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_samplecnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_bvhcnt 0x0
|
||||||
|
; GISEL-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GISEL-NEXT: s_xor_saveexec_b32 s0, -1
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v0, s32
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508
|
||||||
|
; GISEL-NEXT: s_clause 0xf
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568
|
||||||
|
; GISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572
|
||||||
|
; GISEL-NEXT: s_mov_b32 exec_lo, -1
|
||||||
|
; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0
|
||||||
|
; GISEL-NEXT: s_mov_b32 s36, good_callee@abs32@lo
|
||||||
|
; GISEL-NEXT: s_mov_b32 s37, good_callee@abs32@hi
|
||||||
|
; GISEL-NEXT: s_wait_alu 0xfffe
|
||||||
|
; GISEL-NEXT: s_xor_b32 exec_lo, s0, -1
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v0, off, s32
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v6, off, s32 offset:24
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v7, off, s32 offset:28
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v8, off, s32 offset:32
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v9, off, s32 offset:36
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v10, off, s32 offset:40
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v11, off, s32 offset:44
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v12, off, s32 offset:48
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v13, off, s32 offset:52
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v14, off, s32 offset:56
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v15, off, s32 offset:60
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v16, off, s32 offset:64
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v17, off, s32 offset:68
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v18, off, s32 offset:72
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v19, off, s32 offset:76
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v20, off, s32 offset:80
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v21, off, s32 offset:84
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v22, off, s32 offset:88
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v23, off, s32 offset:92
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v24, off, s32 offset:96
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v25, off, s32 offset:100
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v26, off, s32 offset:104
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v27, off, s32 offset:108
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v28, off, s32 offset:112
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v29, off, s32 offset:116
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v30, off, s32 offset:120
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v31, off, s32 offset:124
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v32, off, s32 offset:128
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v33, off, s32 offset:132
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v34, off, s32 offset:136
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v35, off, s32 offset:140
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v36, off, s32 offset:144
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v37, off, s32 offset:148
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v38, off, s32 offset:152
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v39, off, s32 offset:156
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v48, off, s32 offset:160
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v49, off, s32 offset:164
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v50, off, s32 offset:168
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v51, off, s32 offset:172
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v52, off, s32 offset:176
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v53, off, s32 offset:180
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v54, off, s32 offset:184
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v55, off, s32 offset:188
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v64, off, s32 offset:192
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v65, off, s32 offset:196
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v66, off, s32 offset:200
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v67, off, s32 offset:204
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v68, off, s32 offset:208
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v69, off, s32 offset:212
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v70, off, s32 offset:216
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v71, off, s32 offset:220
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v80, off, s32 offset:224
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v81, off, s32 offset:228
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v82, off, s32 offset:232
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v83, off, s32 offset:236
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v84, off, s32 offset:240
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v85, off, s32 offset:244
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v86, off, s32 offset:248
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v87, off, s32 offset:252
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v96, off, s32 offset:256
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v97, off, s32 offset:260
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v98, off, s32 offset:264
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v99, off, s32 offset:268
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v100, off, s32 offset:272
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v101, off, s32 offset:276
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v102, off, s32 offset:280
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v103, off, s32 offset:284
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v112, off, s32 offset:288
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v113, off, s32 offset:292
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v114, off, s32 offset:296
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v115, off, s32 offset:300
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v116, off, s32 offset:304
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v117, off, s32 offset:308
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v118, off, s32 offset:312
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v119, off, s32 offset:316
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v128, off, s32 offset:320
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v129, off, s32 offset:324
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v130, off, s32 offset:328
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v131, off, s32 offset:332
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v132, off, s32 offset:336
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v133, off, s32 offset:340
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v134, off, s32 offset:344
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v135, off, s32 offset:348
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v144, off, s32 offset:352
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v145, off, s32 offset:356
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v146, off, s32 offset:360
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v147, off, s32 offset:364
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v148, off, s32 offset:368
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v149, off, s32 offset:372
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v150, off, s32 offset:376
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v151, off, s32 offset:380
|
||||||
|
; GISEL-NEXT: s_clause 0x1f
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v160, off, s32 offset:384
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v161, off, s32 offset:388
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v162, off, s32 offset:392
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v163, off, s32 offset:396
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v164, off, s32 offset:400
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v165, off, s32 offset:404
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v166, off, s32 offset:408
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v167, off, s32 offset:412
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v176, off, s32 offset:416
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v177, off, s32 offset:420
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v178, off, s32 offset:424
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v179, off, s32 offset:428
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v180, off, s32 offset:432
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v181, off, s32 offset:436
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v182, off, s32 offset:440
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v183, off, s32 offset:444
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v192, off, s32 offset:448
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v193, off, s32 offset:452
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v194, off, s32 offset:456
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v195, off, s32 offset:460
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v196, off, s32 offset:464
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v197, off, s32 offset:468
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v198, off, s32 offset:472
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v199, off, s32 offset:476
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v208, off, s32 offset:480
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v209, off, s32 offset:484
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v210, off, s32 offset:488
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v211, off, s32 offset:492
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v212, off, s32 offset:496
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v213, off, s32 offset:500
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v214, off, s32 offset:504
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v215, off, s32 offset:508
|
||||||
|
; GISEL-NEXT: s_clause 0xf
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v224, off, s32 offset:512
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v225, off, s32 offset:516
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v226, off, s32 offset:520
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v227, off, s32 offset:524
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v228, off, s32 offset:528
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v229, off, s32 offset:532
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v230, off, s32 offset:536
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v231, off, s32 offset:540
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v240, off, s32 offset:544
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v241, off, s32 offset:548
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v242, off, s32 offset:552
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v243, off, s32 offset:556
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v244, off, s32 offset:560
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v245, off, s32 offset:564
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v246, off, s32 offset:568
|
||||||
|
; GISEL-NEXT: scratch_load_b32 v247, off, s32 offset:572
|
||||||
|
; GISEL-NEXT: s_mov_b32 exec_lo, s0
|
||||||
|
; GISEL-NEXT: s_setpc_b64 s[36:37]
|
||||||
|
%y = add i32 %x, 13
|
||||||
|
%ret = tail call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c)
|
||||||
|
ret i32 %ret
|
||||||
|
}
|
||||||
|
|
||||||
declare amdgpu_gfx_whole_wave void @void_callee(i1 %active, i32 %x)
|
declare amdgpu_gfx_whole_wave void @void_callee(i1 %active, i32 %x)
|
||||||
|
|
||||||
define amdgpu_gfx void @ret_void(i32 %x) {
|
define amdgpu_gfx void @ret_void(i32 %x) {
|
||||||
|
@ -33,7 +33,7 @@ body: |
|
|||||||
; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
|
; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
|
||||||
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
|
; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
|
||||||
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
$vgpr0 = V_MOV_B32_e32 14, implicit $exec
|
$vgpr0 = V_MOV_B32_e32 14, implicit $exec
|
||||||
SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
|
SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
|
||||||
@ -69,7 +69,7 @@ body: |
|
|||||||
; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec
|
; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec
|
||||||
; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0
|
; CHECK-NEXT: SI_RETURN
|
||||||
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
$vgpr40 = V_MOV_B32_e32 14, implicit $exec
|
$vgpr40 = V_MOV_B32_e32 14, implicit $exec
|
||||||
SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0
|
SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0
|
||||||
@ -110,7 +110,7 @@ body: |
|
|||||||
; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
|
; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
|
||||||
; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo
|
; CHECK-NEXT: SI_RETURN
|
||||||
$vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192
|
$vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192
|
||||||
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
$sgpr20 = S_MOV_B32 14, implicit $exec
|
$sgpr20 = S_MOV_B32 14, implicit $exec
|
||||||
@ -151,7 +151,7 @@ body: |
|
|||||||
; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0
|
; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0
|
||||||
; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo
|
; CHECK-NEXT: SI_RETURN
|
||||||
$vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
|
$vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
|
||||||
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
$sgpr20 = S_MOV_B32 14, implicit $exec
|
$sgpr20 = S_MOV_B32 14, implicit $exec
|
||||||
@ -207,7 +207,7 @@ body: |
|
|||||||
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
||||||
; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo
|
; CHECK-NEXT: SI_RETURN
|
||||||
$vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
|
$vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
|
||||||
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
|
S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
|
||||||
@ -265,7 +265,7 @@ body: |
|
|||||||
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
|
||||||
; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr3
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr3
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr3
|
; CHECK-NEXT: SI_RETURN
|
||||||
$vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
|
$vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
|
||||||
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
|
S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
|
||||||
@ -322,7 +322,7 @@ body: |
|
|||||||
; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
|
; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
|
||||||
; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
|
; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
|
; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
|
||||||
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
$vgpr0 = V_MOV_B32_e32 14, implicit $exec
|
$vgpr0 = V_MOV_B32_e32 14, implicit $exec
|
||||||
S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42
|
S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42
|
||||||
@ -363,7 +363,7 @@ body: |
|
|||||||
; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||||
; CHECK-NEXT: S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40
|
; CHECK-NEXT: S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
|
; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
|
||||||
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
||||||
S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40
|
S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40
|
||||||
SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
|
SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
|
||||||
@ -422,7 +422,7 @@ body: |
|
|||||||
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
|
||||||
; CHECK-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
; CHECK-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||||||
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
|
||||||
; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo, implicit $vgpr0
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||||
bb.0:
|
bb.0:
|
||||||
successors: %bb.1, %bb.2
|
successors: %bb.1, %bb.2
|
||||||
liveins: $vgpr0, $vgpr1
|
liveins: $vgpr0, $vgpr1
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user