Compare commits

...

1 Commits

Author SHA1 Message Date
Matt Arsenault
f594b23678 AMDGPU: Replace copy-to-mov-imm folding logic with class compat checks
This strengthens the check to ensure the new mov's source class
is compatible with the source register. This avoids using the register
sized based checks in getMovOpcode, which don't quite understand
AV superclasses correctly. As a side effect it also enables more folds
into true16 movs.

getMovOpcode should probably be deleted, or at least replaced
with class check based logic. In this particular case other
legality checks need to be mixed in with attempted IR changes,
so I didn't try to push all of that into the opcode selection.
2025-08-20 18:57:15 +09:00
7 changed files with 575 additions and 292 deletions

View File

@ -1248,6 +1248,7 @@ void SIFoldOperandsImpl::foldOperand(
if (FoldingImmLike && UseMI->isCopy()) {
Register DestReg = UseMI->getOperand(0).getReg();
Register SrcReg = UseMI->getOperand(1).getReg();
unsigned UseSubReg = UseMI->getOperand(1).getSubReg();
assert(SrcReg.isVirtual());
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
@ -1278,44 +1279,60 @@ void SIFoldOperandsImpl::foldOperand(
DestRC = &AMDGPU::SGPR_32RegClass;
}
// In order to fold immediates into copies, we need to change the
// copy to a MOV.
// In order to fold immediates into copies, we need to change the copy to a
// MOV. Find a compatible mov instruction with the value.
for (unsigned MovOp :
{AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64}) {
const MCInstrDesc &MovDesc = TII->get(MovOp);
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
const TargetRegisterClass *MovDstRC =
TRI->getRegClass(MovDesc.operands()[0].RegClass);
// Fold if the destination register class of the MOV instruction (ResRC)
// is a superclass of (or equal to) the destination register class of the
// COPY (DestRC). If this condition fails, folding would be illegal.
if (!DestRC->hasSuperClassEq(MovDstRC))
continue;
const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
const TargetRegisterClass *MovSrcRC =
TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
if (UseSubReg)
MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
break;
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
while (ImpOpI != ImpOpE) {
MachineInstr::mop_iterator Tmp = ImpOpI;
ImpOpI++;
UseMI->removeOperand(UseMI->getOperandNo(Tmp));
}
UseMI->setDesc(MovDesc);
if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
MachineOperand NewSrcOp(SrcOp);
MachineFunction *MF = UseMI->getParent()->getParent();
UseMI->removeOperand(1);
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers
UseMI->addOperand(NewSrcOp); // src0
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel
UseOpIdx = SrcIdx;
UseOp = &UseMI->getOperand(UseOpIdx);
}
CopiesToReplace.push_back(UseMI);
break;
}
// We failed to replace the copy, so give up.
if (UseMI->getOpcode() == AMDGPU::COPY)
return;
// Fold if the destination register class of the MOV instruction (ResRC)
// is a superclass of (or equal to) the destination register class of the
// COPY (DestRC). If this condition fails, folding would be illegal.
const MCInstrDesc &MovDesc = TII->get(MovOp);
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
const TargetRegisterClass *ResRC =
TRI->getRegClass(MovDesc.operands()[0].RegClass);
if (!DestRC->hasSuperClassEq(ResRC))
return;
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
while (ImpOpI != ImpOpE) {
MachineInstr::mop_iterator Tmp = ImpOpI;
ImpOpI++;
UseMI->removeOperand(UseMI->getOperandNo(Tmp));
}
UseMI->setDesc(TII->get(MovOp));
if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
MachineOperand NewSrcOp(SrcOp);
MachineFunction *MF = UseMI->getParent()->getParent();
UseMI->removeOperand(1);
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers
UseMI->addOperand(NewSrcOp); // src0
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel
UseOpIdx = 2;
UseOp = &UseMI->getOperand(UseOpIdx);
}
CopiesToReplace.push_back(UseMI);
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
UseMI->getOperand(0).getReg().isVirtual() &&

View File

@ -197,7 +197,7 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB1_2
; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800
; GFX11-TRUE16-NEXT: .LBB1_2: ; %two
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
@ -303,7 +303,7 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB2_2
; GFX11-TRUE16-NEXT: ; %bb.1: ; %two
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800
; GFX11-TRUE16-NEXT: .LBB2_2: ; %one
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7

View File

@ -426,16 +426,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i8_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-NEXT: s_getpc_b64 s[2:3]
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-NEXT: s_mov_b32 s32, 0
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-NEXT: s_endpgm
; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3]
; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-TRUE16-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_imm:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3]
; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-FAKE16-NEXT: s_endpgm
;
; HSA-LABEL: test_call_external_void_func_i8_imm:
; HSA: ; %bb.0:
@ -723,16 +734,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_i16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-NEXT: s_getpc_b64 s[2:3]
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-NEXT: s_mov_b32 s32, 0
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-NEXT: s_endpgm
; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3]
; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-TRUE16-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: test_call_external_void_func_i16_imm:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3]
; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-FAKE16-NEXT: s_endpgm
;
; HSA-LABEL: test_call_external_void_func_i16_imm:
; HSA: ; %bb.0:
@ -1642,16 +1664,27 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_void_func_f16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX11-NEXT: s_getpc_b64 s[2:3]
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-NEXT: s_mov_b32 s32, 0
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-NEXT: s_endpgm
; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400
; GFX11-TRUE16-NEXT: s_getpc_b64 s[2:3]
; GFX11-TRUE16-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
; GFX11-TRUE16-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-TRUE16-NEXT: s_mov_b32 s32, 0
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-TRUE16-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: test_call_external_void_func_f16_imm:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX11-FAKE16-NEXT: s_getpc_b64 s[2:3]
; GFX11-FAKE16-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
; GFX11-FAKE16-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-FAKE16-NEXT: s_mov_b32 s32, 0
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[2:3]
; GFX11-FAKE16-NEXT: s_endpgm
;
; HSA-LABEL: test_call_external_void_func_f16_imm:
; HSA: ; %bb.0:

View File

@ -559,33 +559,61 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_external_void_func_i8_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: test_call_external_void_func_i8_imm:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi
; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo
; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
; GFX10-SCRATCH: ; %bb.0:
@ -978,33 +1006,61 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_external_void_func_i16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b
; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: test_call_external_void_func_i16_imm:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi
; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo
; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
; GFX10-SCRATCH: ; %bb.0:
@ -2161,33 +2217,61 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_external_void_func_f16_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400
; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: test_call_external_void_func_f16_imm:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi
; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo
; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
; GFX10-SCRATCH: ; %bb.0:

View File

@ -106,11 +106,17 @@ define amdgpu_gfx i16 @return_i16() #0 {
; GFX9-NEXT: v_mov_b32_e32 v0, 10
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_i16:
; GFX10PLUS: ; %bb.0: ; %entry
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 10
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: return_i16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 10
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_i16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b16_e32 v0.l, 10
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
ret i16 10
}

View File

@ -19,16 +19,27 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr addrspace(1) %out) {
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_neg_0.0_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_neg_0.0_i16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x80,0xff,0xff]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_neg_0.0_i16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_neg_0.0_i16:
; VI: ; %bb.0:
@ -66,15 +77,25 @@ define amdgpu_kernel void @store_inline_imm_0.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_0.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_0.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0 ; encoding: [0x80,0x38,0x00,0x7e]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_0.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_0.0_f16:
; VI: ; %bb.0:
@ -110,15 +131,25 @@ define amdgpu_kernel void @store_imm_neg_0.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_imm_neg_0.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_imm_neg_0.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x80,0xff,0xff]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_imm_neg_0.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_imm_neg_0.0_f16:
; VI: ; %bb.0:
@ -154,15 +185,25 @@ define amdgpu_kernel void @store_inline_imm_0.5_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_0.5_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_0.5_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x38,0x00,0x00]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_0.5_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_0.5_f16:
; VI: ; %bb.0:
@ -198,15 +239,25 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_m_0.5_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_m_0.5_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xb800 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xb8,0xff,0xff]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_m_0.5_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_m_0.5_f16:
; VI: ; %bb.0:
@ -242,15 +293,25 @@ define amdgpu_kernel void @store_inline_imm_1.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_1.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_1.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c00 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x3c,0x00,0x00]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_1.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_1.0_f16:
; VI: ; %bb.0:
@ -286,15 +347,25 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_m_1.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_m_1.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xbc00 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xbc,0xff,0xff]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_m_1.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_m_1.0_f16:
; VI: ; %bb.0:
@ -330,15 +401,25 @@ define amdgpu_kernel void @store_inline_imm_2.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_2.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_2.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x40,0x00,0x00]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_2.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_2.0_f16:
; VI: ; %bb.0:
@ -374,15 +455,25 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_m_2.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_m_2.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xc000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xc0,0xff,0xff]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_m_2.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_m_2.0_f16:
; VI: ; %bb.0:
@ -418,15 +509,25 @@ define amdgpu_kernel void @store_inline_imm_4.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_4.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_4.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x44,0x00,0x00]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_4.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_4.0_f16:
; VI: ; %bb.0:
@ -462,15 +563,25 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_m_4.0_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_m_4.0_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xc400 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xc4,0xff,0xff]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_m_4.0_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_m_4.0_f16:
; VI: ; %bb.0:
@ -506,15 +617,25 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_inv_2pi_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_inv_2pi_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3118 ; encoding: [0xff,0x38,0x00,0x7e,0x18,0x31,0x00,0x00]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_inv_2pi_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_inv_2pi_f16:
; VI: ; %bb.0:
@ -550,15 +671,25 @@ define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(ptr addrspace(1) %out)
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_inline_imm_m_inv_2pi_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xb118 ; encoding: [0xff,0x38,0x00,0x7e,0x18,0xb1,0xff,0xff]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_inline_imm_m_inv_2pi_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
; VI: ; %bb.0:
@ -594,15 +725,25 @@ define amdgpu_kernel void @store_literal_imm_f16(ptr addrspace(1) %out) {
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
;
; GFX11-LABEL: store_literal_imm_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
; GFX11-TRUE16-LABEL: store_literal_imm_f16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x6c00 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x6c,0x00,0x00]
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; GFX11-FAKE16-LABEL: store_literal_imm_f16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
;
; VI-LABEL: store_literal_imm_f16:
; VI: ; %bb.0:

View File

@ -358,14 +358,15 @@ main_body:
define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
; GFX11-TRUE16-LABEL: v_interp_f16_imm_params:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v2, v0.l, v2, v0.l wait_exp:7
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v1 wait_exp:7
; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
@ -383,14 +384,15 @@ define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #
;
; GFX12-TRUE16-LABEL: v_interp_f16_imm_params:
; GFX12-TRUE16: ; %bb.0: ; %main_body
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v3, s1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
; GFX12-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-TRUE16-NEXT: v_interp_p10_f16_f32 v2, v0.l, v2, v0.l wait_exp:7
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v1 wait_exp:7
; GFX12-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
; GFX12-TRUE16-NEXT: ; return to shader part epilog
;