Compare commits
1 Commits
main
...
users/arse
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f594b23678 |
@ -1248,6 +1248,7 @@ void SIFoldOperandsImpl::foldOperand(
|
||||
if (FoldingImmLike && UseMI->isCopy()) {
|
||||
Register DestReg = UseMI->getOperand(0).getReg();
|
||||
Register SrcReg = UseMI->getOperand(1).getReg();
|
||||
unsigned UseSubReg = UseMI->getOperand(1).getSubReg();
|
||||
assert(SrcReg.isVirtual());
|
||||
|
||||
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
|
||||
@ -1278,44 +1279,60 @@ void SIFoldOperandsImpl::foldOperand(
|
||||
DestRC = &AMDGPU::SGPR_32RegClass;
|
||||
}
|
||||
|
||||
// In order to fold immediates into copies, we need to change the
|
||||
// copy to a MOV.
|
||||
// In order to fold immediates into copies, we need to change the copy to a
|
||||
// MOV. Find a compatible mov instruction with the value.
|
||||
for (unsigned MovOp :
|
||||
{AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
|
||||
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64}) {
|
||||
const MCInstrDesc &MovDesc = TII->get(MovOp);
|
||||
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
|
||||
|
||||
unsigned MovOp = TII->getMovOpcode(DestRC);
|
||||
if (MovOp == AMDGPU::COPY)
|
||||
const TargetRegisterClass *MovDstRC =
|
||||
TRI->getRegClass(MovDesc.operands()[0].RegClass);
|
||||
|
||||
// Fold if the destination register class of the MOV instruction (ResRC)
|
||||
// is a superclass of (or equal to) the destination register class of the
|
||||
// COPY (DestRC). If this condition fails, folding would be illegal.
|
||||
if (!DestRC->hasSuperClassEq(MovDstRC))
|
||||
continue;
|
||||
|
||||
const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
|
||||
const TargetRegisterClass *MovSrcRC =
|
||||
TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
|
||||
|
||||
if (UseSubReg)
|
||||
MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
|
||||
if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
|
||||
break;
|
||||
|
||||
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
|
||||
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
|
||||
while (ImpOpI != ImpOpE) {
|
||||
MachineInstr::mop_iterator Tmp = ImpOpI;
|
||||
ImpOpI++;
|
||||
UseMI->removeOperand(UseMI->getOperandNo(Tmp));
|
||||
}
|
||||
UseMI->setDesc(MovDesc);
|
||||
|
||||
if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
|
||||
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
|
||||
MachineOperand NewSrcOp(SrcOp);
|
||||
MachineFunction *MF = UseMI->getParent()->getParent();
|
||||
UseMI->removeOperand(1);
|
||||
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers
|
||||
UseMI->addOperand(NewSrcOp); // src0
|
||||
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel
|
||||
UseOpIdx = SrcIdx;
|
||||
UseOp = &UseMI->getOperand(UseOpIdx);
|
||||
}
|
||||
CopiesToReplace.push_back(UseMI);
|
||||
break;
|
||||
}
|
||||
|
||||
// We failed to replace the copy, so give up.
|
||||
if (UseMI->getOpcode() == AMDGPU::COPY)
|
||||
return;
|
||||
|
||||
// Fold if the destination register class of the MOV instruction (ResRC)
|
||||
// is a superclass of (or equal to) the destination register class of the
|
||||
// COPY (DestRC). If this condition fails, folding would be illegal.
|
||||
const MCInstrDesc &MovDesc = TII->get(MovOp);
|
||||
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
|
||||
const TargetRegisterClass *ResRC =
|
||||
TRI->getRegClass(MovDesc.operands()[0].RegClass);
|
||||
if (!DestRC->hasSuperClassEq(ResRC))
|
||||
return;
|
||||
|
||||
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
|
||||
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
|
||||
while (ImpOpI != ImpOpE) {
|
||||
MachineInstr::mop_iterator Tmp = ImpOpI;
|
||||
ImpOpI++;
|
||||
UseMI->removeOperand(UseMI->getOperandNo(Tmp));
|
||||
}
|
||||
UseMI->setDesc(TII->get(MovOp));
|
||||
|
||||
if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
|
||||
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
|
||||
MachineOperand NewSrcOp(SrcOp);
|
||||
MachineFunction *MF = UseMI->getParent()->getParent();
|
||||
UseMI->removeOperand(1);
|
||||
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers
|
||||
UseMI->addOperand(NewSrcOp); // src0
|
||||
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel
|
||||
UseOpIdx = 2;
|
||||
UseOp = &UseMI->getOperand(UseOpIdx);
|
||||
}
|
||||
CopiesToReplace.push_back(UseMI);
|
||||
} else {
|
||||
if (UseMI->isCopy() && OpToFold.isReg() &&
|
||||
UseMI->getOperand(0).getReg().isVirtual() &&
|
||||
|
||||
@ -197,7 +197,7 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
|
||||
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l
|
||||
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
|
||||
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800
|
||||
; GFX11-TRUE16-NEXT: .LBB1_2: ; %two
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
|
||||
@ -303,7 +303,7 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
|
||||
; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l
|
||||
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB2_2
|
||||
; GFX11-TRUE16-NEXT: ; %bb.1: ; %two
|
||||
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800
|
||||
; GFX11-TRUE16-NEXT: .LBB2_2: ; %one
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
|
||||
|
||||
@ -426,16 +426,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: test_call_external_void_func_i8_imm:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
|
||||
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
|
||||
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
|
||||
; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
|
||||
; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
|
||||
; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_imm:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
|
||||
; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
|
||||
; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; HSA-LABEL: test_call_external_void_func_i8_imm:
|
||||
; HSA: ; %bb.0:
|
||||
@ -723,16 +734,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: test_call_external_void_func_i16_imm:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
|
||||
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
|
||||
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
|
||||
; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
|
||||
; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
|
||||
; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: test_call_external_void_func_i16_imm:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
|
||||
; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
|
||||
; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; HSA-LABEL: test_call_external_void_func_i16_imm:
|
||||
; HSA: ; %bb.0:
|
||||
@ -1642,16 +1664,27 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: test_call_external_void_func_f16_imm:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
|
||||
; GFX11-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
|
||||
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
|
||||
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400
|
||||
; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
|
||||
; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
|
||||
; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: test_call_external_void_func_f16_imm:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400
|
||||
; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3]
|
||||
; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
|
||||
; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
|
||||
; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0
|
||||
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; HSA-LABEL: test_call_external_void_func_f16_imm:
|
||||
; HSA: ; %bb.0:
|
||||
|
||||
@ -559,33 +559,61 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-LABEL: test_call_external_void_func_i8_imm:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
|
||||
; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
|
||||
; GFX11-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
|
||||
; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_imm:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
|
||||
; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
|
||||
; GFX10-SCRATCH: ; %bb.0:
|
||||
@ -978,33 +1006,61 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-LABEL: test_call_external_void_func_i16_imm:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
|
||||
; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
|
||||
; GFX11-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
|
||||
; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: test_call_external_void_func_i16_imm:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
|
||||
; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
|
||||
; GFX10-SCRATCH: ; %bb.0:
|
||||
@ -2161,33 +2217,61 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-LABEL: test_call_external_void_func_f16_imm:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
|
||||
; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
|
||||
; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
|
||||
; GFX11-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
|
||||
; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: test_call_external_void_func_f16_imm:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
|
||||
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
|
||||
; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
|
||||
; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
|
||||
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
|
||||
; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
|
||||
; GFX10-SCRATCH: ; %bb.0:
|
||||
|
||||
@ -106,11 +106,17 @@ define amdgpu_gfx i16 @return_i16() #0 {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 10
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10PLUS-LABEL: return_i16:
|
||||
; GFX10PLUS: ; %bb.0: ; %entry
|
||||
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 10
|
||||
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
|
||||
; GFX10-LABEL: return_i16:
|
||||
; GFX10: ; %bb.0: ; %entry
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 10
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-LABEL: return_i16:
|
||||
; GFX11: ; %bb.0: ; %entry
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: v_mov_b16_e32 v0.l, 10
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
ret i16 10
|
||||
}
|
||||
|
||||
@ -19,16 +19,27 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_neg_0.0_i16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_neg_0.0_i16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x80,0xff,0xff]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_neg_0.0_i16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_neg_0.0_i16:
|
||||
; VI: ; %bb.0:
|
||||
@ -66,15 +77,25 @@ define amdgpu_kernel void @store_inline_imm_0.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_0.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_0.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0 ; encoding: [0x80,0x38,0x00,0x7e]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_0.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_0.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -110,15 +131,25 @@ define amdgpu_kernel void @store_imm_neg_0.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_imm_neg_0.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_imm_neg_0.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x80,0xff,0xff]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_imm_neg_0.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_imm_neg_0.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -154,15 +185,25 @@ define amdgpu_kernel void @store_inline_imm_0.5_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_0.5_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_0.5_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x38,0x00,0x00]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_0.5_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_0.5_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -198,15 +239,25 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_m_0.5_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_m_0.5_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xb800 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xb8,0xff,0xff]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_m_0.5_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_m_0.5_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -242,15 +293,25 @@ define amdgpu_kernel void @store_inline_imm_1.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_1.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_1.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c00 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x3c,0x00,0x00]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_1.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_1.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -286,15 +347,25 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_m_1.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_m_1.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xbc00 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xbc,0xff,0xff]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_m_1.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_m_1.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -330,15 +401,25 @@ define amdgpu_kernel void @store_inline_imm_2.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_2.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_2.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x40,0x00,0x00]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_2.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_2.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -374,15 +455,25 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_m_2.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_m_2.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xc000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xc0,0xff,0xff]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_m_2.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_m_2.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -418,15 +509,25 @@ define amdgpu_kernel void @store_inline_imm_4.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_4.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_4.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x44,0x00,0x00]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_4.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_4.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -462,15 +563,25 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_m_4.0_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_m_4.0_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xc400 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xc4,0xff,0xff]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_m_4.0_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_m_4.0_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -506,15 +617,25 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_inv_2pi_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_inv_2pi_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3118 ; encoding: [0xff,0x38,0x00,0x7e,0x18,0x31,0x00,0x00]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_inv_2pi_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_inv_2pi_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -550,15 +671,25 @@ define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(ptr addrspace(1) %out)
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_inline_imm_m_inv_2pi_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xb118 ; encoding: [0xff,0x38,0x00,0x7e,0x18,0xb1,0xff,0xff]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_inline_imm_m_inv_2pi_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
|
||||
; VI: ; %bb.0:
|
||||
@ -594,15 +725,25 @@ define amdgpu_kernel void @store_literal_imm_f16(ptr addrspace(1) %out) {
|
||||
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
|
||||
;
|
||||
; GFX11-LABEL: store_literal_imm_f16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
|
||||
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
; GFX11-TRUE16-LABEL: store_literal_imm_f16:
|
||||
; GFX11-TRUE16: ; %bb.0:
|
||||
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x6c00 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x6c,0x00,0x00]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: store_literal_imm_f16:
|
||||
; GFX11-FAKE16: ; %bb.0:
|
||||
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
|
||||
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
|
||||
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
|
||||
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
|
||||
;
|
||||
; VI-LABEL: store_literal_imm_f16:
|
||||
; VI: ; %bb.0:
|
||||
|
||||
@ -358,14 +358,15 @@ main_body:
|
||||
define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
|
||||
; GFX11-TRUE16-LABEL: v_interp_f16_imm_params:
|
||||
; GFX11-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0
|
||||
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
|
||||
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
|
||||
; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
|
||||
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||
; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v2, v0.l, v2, v0.l wait_exp:7
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v1 wait_exp:7
|
||||
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
|
||||
; GFX11-TRUE16-NEXT: ; return to shader part epilog
|
||||
;
|
||||
@ -383,14 +384,15 @@ define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #
|
||||
;
|
||||
; GFX12-TRUE16-LABEL: v_interp_f16_imm_params:
|
||||
; GFX12-TRUE16: ; %bb.0: ; %main_body
|
||||
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0
|
||||
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
|
||||
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
|
||||
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX12-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
|
||||
; GFX12-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
|
||||
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
|
||||
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
|
||||
; GFX12-TRUE16-NEXT: v_interp_p10_f16_f32 v2, v0.l, v2, v0.l wait_exp:7
|
||||
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX12-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v1 wait_exp:7
|
||||
; GFX12-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
|
||||
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
|
||||
; GFX12-TRUE16-NEXT: ; return to shader part epilog
|
||||
;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user