[AMDGPU][True16][CodeGen] Support AND/OR/XOR and LDEXP True16 format (#102620)

Support AND/OR/XOR true16 and LDEXP true/fake16 format.

These instructions are previously implemented with fake16 profile.
Fixing the implementation.

Added a RA hint so that when using 16bit register in a 32bit
instruction, try to use the register directly without an extra 16bit
move

---------

Co-authored-by: guochen2 <guochen2@amd.com>
This commit is contained in:
Brox Chen 2024-08-13 12:23:39 -04:00 committed by GitHub
parent 248e885235
commit afd42fb303
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 1836 additions and 817 deletions

View File

@ -161,18 +161,34 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
// TODO: Skip masking high bits if def is known boolean.
bool IsSGPR = TRI.isSGPRClass(SrcRC);
unsigned AndOpc =
IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
.addImm(1)
.addReg(SrcReg);
if (IsSGPR)
And.setOperandDead(3); // Dead scc
if (AMDGPU::getRegBitWidth(SrcRC->getID()) == 16) {
assert(Subtarget->useRealTrue16Insts());
const int64_t NoMods = 0;
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
.addImm(NoMods)
.addImm(1)
.addImm(NoMods)
.addReg(SrcReg)
.addImm(NoMods);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
.addImm(NoMods)
.addImm(0)
.addImm(NoMods)
.addReg(MaskedReg)
.addImm(NoMods);
} else {
bool IsSGPR = TRI.isSGPRClass(SrcRC);
unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
.addImm(1)
.addReg(SrcReg);
if (IsSGPR)
And.setOperandDead(3); // Dead scc
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
.addImm(0)
.addReg(MaskedReg);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
.addImm(0)
.addReg(MaskedReg);
}
}
if (!MRI->getRegClassOrNull(SrcReg))
@ -2206,6 +2222,16 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
return false;
}
if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
assert(STI.useRealTrue16Insts());
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *MBB = I.getParent();
BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), DstReg)
.addReg(SrcReg, 0, AMDGPU::lo16);
I.eraseFromParent();
return true;
}
if (DstTy == LLT::fixed_vector(2, 16) && SrcTy == LLT::fixed_vector(2, 32)) {
MachineBasicBlock *MBB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();

View File

@ -2030,6 +2030,8 @@ def : GCNPat <
>;
foreach fp16vt = [f16, bf16] in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let SubtargetPredicate = p in {
def : GCNPat <
(fabs (fp16vt VGPR_32:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
@ -2044,6 +2046,24 @@ def : GCNPat <
(fneg (fabs (fp16vt VGPR_32:$src))),
(V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
>;
}
let SubtargetPredicate = UseRealTrue16Insts in {
def : GCNPat <
(fabs (fp16vt VGPR_16:$src)),
(V_AND_B16_t16_e64 (i32 0), (i16 0x7fff), (i32 0), VGPR_16:$src)
>;
def : GCNPat <
(fneg (fp16vt VGPR_16:$src)),
(V_XOR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src)
>;
def : GCNPat <
(fneg (fabs (fp16vt VGPR_16:$src))),
(V_OR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src) // Set sign bit
>;
} // End SubtargetPredicate = UseRealTrue16Insts
} // End foreach fp16vt = ...
def : GCNPat <

View File

@ -152,6 +152,10 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
!AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
return false;
if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
!AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
return false;
}
}
return true;

View File

@ -1397,7 +1397,8 @@ def : GCNPat <
} // End OtherPredicates = [isGFX8Plus]
let OtherPredicates = [isGFX8Plus] in {
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let OtherPredicates = [isGFX8Plus, p] in {
def : GCNPat<
(i32 (anyext i16:$src)),
(COPY $src)
@ -1420,7 +1421,43 @@ def : GCNPat <
(EXTRACT_SUBREG $src, sub0)
>;
} // End OtherPredicates = [isGFX8Plus]
} // End OtherPredicates = [isGFX8Plus, p]
let OtherPredicates = [UseFakeTrue16Insts] in {
def : GCNPat<
(i32 (DivergentUnaryFrag<anyext> i16:$src)),
(COPY $src)
>;
} // End OtherPredicates = [UseFakeTrue16Insts]
let OtherPredicates = [UseRealTrue16Insts] in {
def : GCNPat<
(i32 (UniformUnaryFrag<anyext> (i16 SReg_32:$src))),
(COPY $src)
>;
def : GCNPat<
(i32 (DivergentUnaryFrag<anyext> i16:$src)),
(REG_SEQUENCE VGPR_32, $src, lo16, (i16 (IMPLICIT_DEF)), hi16)
>;
def : GCNPat<
(i64 (anyext i16:$src)),
(REG_SEQUENCE VReg_64, $src, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
>;
def : GCNPat<
(i16 (trunc i32:$src)),
(EXTRACT_SUBREG $src, lo16)
>;
def : GCNPat <
(i16 (trunc i64:$src)),
(EXTRACT_SUBREG $src, lo16)
>;
} // End OtherPredicates = [UseRealTrue16Insts]
//===----------------------------------------------------------------------===//
// GFX9

View File

@ -922,18 +922,25 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
let HasSrc1FloatMods = 0;
let Src1ModSDWA = Int16SDWAInputMods;
}
def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_16_Lo128>;
let Src1DPP = RegisterOperand<VGPR_16_Lo128>;
let Src1ModDPP = IntT16VRegInputMods<0/*IsFake16*/>;
}
def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
let Src1DPP = RegisterOperand<VGPR_32_Lo128>;
let Src1ModDPP = IntT16VRegInputMods</* IsFake16= */ 1>;
let Src1ModDPP = IntT16VRegInputMods<1/*IsFake16*/>;
}
let isReMaterializable = 1 in {
let FPDPRounding = 1 in {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
let SubtargetPredicate = HasTrue16BitInsts in
let SubtargetPredicate = UseRealTrue16Insts in
defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
let SubtargetPredicate = UseFakeTrue16Insts in
defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">;
} // End FPDPRounding = 1
// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
@ -968,14 +975,30 @@ class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.
let OtherPredicates = [NotHasTrue16BitInsts] in
def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
let OtherPredicates = [HasTrue16BitInsts] in
def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
class LDEXP_F16_t16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
(P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
(i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
(inst $src0_modifiers, $src0,
$src1_modifiers, $src1,
$clamp, /* clamp */
$omod, /* omod */
0) /* op_sel */
>;
let OtherPredicates = [UseRealTrue16Insts] in
def : LDEXP_F16_t16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
let OtherPredicates = [UseFakeTrue16Insts] in
def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_fake16_e64>;
let SubtargetPredicate = isGFX11Plus in {
let isCommutable = 1 in {
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
defm V_AND_B16_fake16 : VOP2Inst_e64 <"v_and_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
defm V_OR_B16_fake16 : VOP2Inst_e64 <"v_or_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
defm V_XOR_B16_fake16 : VOP2Inst_e64 <"v_xor_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX11Plus
@ -1714,6 +1737,7 @@ defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">;
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;

View File

@ -1227,8 +1227,11 @@ let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2
} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
defm V_AND_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">;
defm V_AND_B16_fake16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">;
defm V_OR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">;
defm V_OR_B16_fake16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">;
defm V_XOR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">;
defm V_XOR_B16_fake16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">;
//===----------------------------------------------------------------------===//
// GFX10.

View File

@ -49,10 +49,11 @@ body: |
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
;
; GFX11-FAKE16-LABEL: name: fceil_s16_vv
; GFX11-FAKE16: liveins: $vgpr0
@ -89,8 +90,9 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
;
; GFX11-FAKE16-LABEL: name: fceil_s16_vs
; GFX11-FAKE16: liveins: $sgpr0
@ -126,10 +128,11 @@ body: |
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
;
; GFX11-FAKE16-LABEL: name: fceil_fneg_s16_vv
; GFX11-FAKE16: liveins: $vgpr0

View File

@ -58,10 +58,11 @@ body: |
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
;
; GFX11-FAKE16-LABEL: name: ffloor_s16_vv
; GFX11-FAKE16: liveins: $vgpr0
@ -98,8 +99,9 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
;
; GFX11-FAKE16-LABEL: name: ffloor_s16_vs
; GFX11-FAKE16: liveins: $sgpr0
@ -135,10 +137,11 @@ body: |
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
;
; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv
; GFX11-FAKE16: liveins: $vgpr0

File diff suppressed because it is too large Load Diff

View File

@ -100,9 +100,7 @@ define amdgpu_kernel void @fadd_f16(
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

View File

@ -165,12 +165,10 @@ define amdgpu_kernel void @ceil_v2f16(
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_ceil_f16_e32 v0.l, v0.l
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-NEXT: v_ceil_f16_e32 v0.h, v1.l
; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_ceil_f16_e32 v0.h, v0.h
; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0

View File

@ -166,12 +166,10 @@ define amdgpu_kernel void @floor_v2f16(
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_floor_f16_e32 v0.l, v0.l
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-NEXT: v_floor_f16_e32 v0.h, v1.l
; GFX11-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_floor_f16_e32 v0.h, v0.h
; GFX11-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0

View File

@ -2,12 +2,14 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL-FAKE16 %s
define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) {
; GFX6-LABEL: test_ldexp_f32_i32:
@ -211,13 +213,22 @@ define half @test_ldexp_f16_i8(half %a, i8 %b) {
; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_f16_i8:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i8:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i8:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_f16_i8:
; GFX6-GISEL: ; %bb.0:
@ -248,15 +259,25 @@ define half @test_ldexp_f16_i8(half %a, i8 %b) {
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_f16_i8:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i8:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i8:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.ldexp.f16.i8(half %a, i8 %b)
ret half %result
}
@ -283,11 +304,19 @@ define half @test_ldexp_f16_i16(half %a, i16 %b) {
; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_ldexp_f16_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i16:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i16:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_f16_i16:
; GFX6-GISEL: ; %bb.0:
@ -297,6 +326,18 @@ define half @test_ldexp_f16_i16(half %a, i16 %b) {
; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i16:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i16:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.ldexp.f16.i16(half %a, i16 %b)
ret half %result
}
@ -328,14 +369,23 @@ define half @test_ldexp_f16_i32(half %a, i32 %b) {
; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i32:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i32:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_f16_i32:
; GFX6-GISEL: ; %bb.0:
@ -363,14 +413,23 @@ define half @test_ldexp_f16_i32(half %a, i32 %b) {
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i32:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i32:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7fff
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.ldexp.f16.i32(half %a, i32 %b)
ret half %result
}
@ -411,19 +470,36 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v3, v2
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i32:
; GFX6-GISEL: ; %bb.0:
@ -460,21 +536,40 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v3, 0x7fff
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v4.l, v2.l
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v3, 0x7fff
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v4, v2
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b)
ret <2 x half> %result
}
@ -509,16 +604,30 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2
; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v3, v2
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i16:
; GFX6-GISEL: ; %bb.0:
@ -549,18 +658,34 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v3
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v2.l, v3.l
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v2, v3
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b)
ret <2 x half> %result
}
@ -608,21 +733,40 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v5, v3
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v3, v0
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v3, v5, v3
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v2
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i32:
; GFX6-GISEL: ; %bb.0:
@ -666,23 +810,44 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v6, v3
; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v5, 0x7fff
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6.l, v3.l
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v2
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v5, 0x7fff
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v6, v3
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b)
ret <3 x half> %result
}
@ -723,17 +888,32 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v4
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i16:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v5, v4
; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v3f16_v3i16:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v5, v4
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i16:
; GFX6-GISEL: ; %bb.0:
@ -770,19 +950,36 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i16:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v5
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i16:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v4.l, v5.l
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v2
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v4, v5
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> %a, <3 x i16> %b)
ret <3 x half> %result
}
@ -839,26 +1036,53 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX11-SDAG-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v5, v6, v5
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v7, v3
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3, v1
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v5, v6, v5
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v3, v7, v3
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v4
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v5
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i32:
; GFX6-GISEL: ; %bb.0:
@ -911,30 +1135,61 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v6
; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v4, v6
; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v6
; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v6
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v4
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v7, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v8, v5
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v6, 0x7fff
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v6
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v6
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v6
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v5, 0xffff8000, v5, v6
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.l, v4.l
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v7.l, v3.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v8.l, v5.l
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v3, 16, v2
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v1, v1, 16, v4
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v6, 0x7fff
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v6
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v6
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v6
; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v5, 0xffff8000, v5, v6
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v4
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v7, v3
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v3, v8, v5
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b)
ret <4 x half> %result
}
@ -983,22 +1238,45 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v4
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i16:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v6, v5
; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v7, v4
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v4f16_v4i16:
; GFX11-SDAG-TRUE16: ; %bb.0:
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v6.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3, v1
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
; GFX11-SDAG-FAKE16: ; %bb.0:
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v6, v5
; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v3, v7, v4
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v3
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i16:
; GFX6-GISEL: ; %bb.0:
@ -1043,25 +1321,51 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i16:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v6
; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v5, v7
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v4f16_v4i16:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.l, v3.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v4.l, v6.l
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v5.l, v7.l
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v3, 16, v2
; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v1, v1, 16, v4
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
; GFX11-GISEL-FAKE16: ; %bb.0:
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v4, v6
; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v3, v5, v7
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> %a, <4 x i16> %b)
ret <4 x half> %result
}

View File

@ -783,49 +783,64 @@
# GFX11: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf]
0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf
# GFX11: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x76]
0x01,0x05,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x76]
0x7f,0x05,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x76]
0x01,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x76]
0x69,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x76]
0x6a,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x76]
0x6b,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x76]
0x7b,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x76]
0x7d,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x76]
0x7e,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x76]
0x7f,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x76]
0x7c,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x76]
0xc1,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x76]
0xf0,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76]
# GFX11-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76]
# GFX11-REAL16: v_ldexp_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x76]
0xfd,0x04,0x0a,0x76
# GFX11: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00]
# GFX11-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00]
# GFX11-REAL16: v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00]
0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00
# GFX11: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30]