[AMDGPU] Use 64-bit literals in codegen on gfx1250 (#148727)
This commit is contained in:
parent
56a4f8d8c1
commit
a32040e483
@ -447,6 +447,35 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
|
||||||
|
if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&
|
||||||
|
CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) {
|
||||||
|
uint64_t C = 0;
|
||||||
|
bool AllConst = true;
|
||||||
|
unsigned EltSize = EltVT.getSizeInBits();
|
||||||
|
for (unsigned I = 0; I < NumVectorElts; ++I) {
|
||||||
|
SDValue Op = N->getOperand(I);
|
||||||
|
if (Op.isUndef()) {
|
||||||
|
AllConst = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
uint64_t Val;
|
||||||
|
if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||||
|
Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
|
||||||
|
} else
|
||||||
|
Val = cast<ConstantSDNode>(Op)->getZExtValue();
|
||||||
|
C |= Val << (EltSize * I);
|
||||||
|
}
|
||||||
|
if (AllConst) {
|
||||||
|
SDValue CV = CurDAG->getTargetConstant(C, DL, MVT::i64);
|
||||||
|
MachineSDNode *Copy =
|
||||||
|
CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV);
|
||||||
|
CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0),
|
||||||
|
RegClass);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
|
assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
|
||||||
"supported yet");
|
"supported yet");
|
||||||
// 32 = Max Num Vector Elements
|
// 32 = Max Num Vector Elements
|
||||||
@ -454,7 +483,6 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
|
|||||||
// 1 = Vector Register Class
|
// 1 = Vector Register Class
|
||||||
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
|
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
|
||||||
|
|
||||||
bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
|
|
||||||
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
|
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
|
||||||
bool IsRegSeq = true;
|
bool IsRegSeq = true;
|
||||||
unsigned NOps = N->getNumOperands();
|
unsigned NOps = N->getNumOperands();
|
||||||
@ -676,7 +704,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||||||
|
|
||||||
case ISD::Constant:
|
case ISD::Constant:
|
||||||
case ISD::ConstantFP: {
|
case ISD::ConstantFP: {
|
||||||
if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
|
if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) ||
|
||||||
|
Subtarget->has64BitLiterals())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
uint64_t Imm;
|
uint64_t Imm;
|
||||||
|
@ -12155,6 +12155,11 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
|
|||||||
if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
|
if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
|
||||||
bitOpWithConstantIsReducible(Opc, ValHi)) ||
|
bitOpWithConstantIsReducible(Opc, ValHi)) ||
|
||||||
(CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
|
(CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
|
||||||
|
// We have 64-bit scalar and/or/xor, but do not have vector forms.
|
||||||
|
if (Subtarget->has64BitLiterals() && CRHS->hasOneUse() &&
|
||||||
|
!CRHS->user_begin()->isDivergent())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// If we need to materialize a 64-bit immediate, it will be split up later
|
// If we need to materialize a 64-bit immediate, it will be split up later
|
||||||
// anyway. Avoid creating the harder to understand 64-bit immediate
|
// anyway. Avoid creating the harder to understand 64-bit immediate
|
||||||
// materialization.
|
// materialization.
|
||||||
|
@ -2273,6 +2273,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||||||
case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
|
case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
|
||||||
const MachineOperand &SrcOp = MI.getOperand(1);
|
const MachineOperand &SrcOp = MI.getOperand(1);
|
||||||
assert(!SrcOp.isFPImm());
|
assert(!SrcOp.isFPImm());
|
||||||
|
|
||||||
|
if (ST.has64BitLiterals()) {
|
||||||
|
MI.setDesc(get(AMDGPU::S_MOV_B64));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
APInt Imm(64, SrcOp.getImm());
|
APInt Imm(64, SrcOp.getImm());
|
||||||
if (Imm.isIntN(32) || isInlineConstant(Imm)) {
|
if (Imm.isIntN(32) || isInlineConstant(Imm)) {
|
||||||
MI.setDesc(get(AMDGPU::S_MOV_B64));
|
MI.setDesc(get(AMDGPU::S_MOV_B64));
|
||||||
@ -6099,14 +6105,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
|
|||||||
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
|
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
|
||||||
if (Is64BitOp &&
|
if (Is64BitOp &&
|
||||||
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
|
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
|
||||||
if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
|
if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
|
||||||
|
(!ST.has64BitLiterals() || InstDesc.getSize() != 4))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// FIXME: We can use sign extended 64-bit literals, but only for signed
|
// FIXME: We can use sign extended 64-bit literals, but only for signed
|
||||||
// operands. At the moment we do not know if an operand is signed.
|
// operands. At the moment we do not know if an operand is signed.
|
||||||
// Such operand will be encoded as its low 32 bits and then either
|
// Such operand will be encoded as its low 32 bits and then either
|
||||||
// correctly sign extended or incorrectly zero extended by HW.
|
// correctly sign extended or incorrectly zero extended by HW.
|
||||||
if (!Is64BitFPOp && (int32_t)Imm < 0)
|
// If 64-bit literals are supported and the literal will be encoded
|
||||||
|
// as full 64 bit we still can use it.
|
||||||
|
if (!Is64BitFPOp && (int32_t)Imm < 0 &&
|
||||||
|
(!ST.has64BitLiterals() || AMDGPU::isValid32BitLiteral(Imm, false)))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -9178,15 +9188,30 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
|
|||||||
if (isDPP(MI))
|
if (isDPP(MI))
|
||||||
return DescSize;
|
return DescSize;
|
||||||
bool HasLiteral = false;
|
bool HasLiteral = false;
|
||||||
|
unsigned LiteralSize = 4;
|
||||||
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
|
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
|
||||||
const MachineOperand &Op = MI.getOperand(I);
|
const MachineOperand &Op = MI.getOperand(I);
|
||||||
const MCOperandInfo &OpInfo = Desc.operands()[I];
|
const MCOperandInfo &OpInfo = Desc.operands()[I];
|
||||||
if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) {
|
if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) {
|
||||||
HasLiteral = true;
|
HasLiteral = true;
|
||||||
|
if (ST.has64BitLiterals()) {
|
||||||
|
switch (OpInfo.OperandType) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
case AMDGPU::OPERAND_REG_IMM_FP64:
|
||||||
|
if (!AMDGPU::isValid32BitLiteral(Op.getImm(), true))
|
||||||
|
LiteralSize = 8;
|
||||||
|
break;
|
||||||
|
case AMDGPU::OPERAND_REG_IMM_INT64:
|
||||||
|
if (!Op.isImm() || !AMDGPU::isValid32BitLiteral(Op.getImm(), false))
|
||||||
|
LiteralSize = 8;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return HasLiteral ? DescSize + 4 : DescSize;
|
return HasLiteral ? DescSize + LiteralSize : DescSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check whether we have extra NSA words.
|
// Check whether we have extra NSA words.
|
||||||
|
@ -1058,7 +1058,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
|
|||||||
// fold an immediate into the shrunk instruction as a literal operand. In
|
// fold an immediate into the shrunk instruction as a literal operand. In
|
||||||
// GFX10 VOP3 instructions can take a literal operand anyway, so there is
|
// GFX10 VOP3 instructions can take a literal operand anyway, so there is
|
||||||
// no advantage to doing this.
|
// no advantage to doing this.
|
||||||
if (ST->hasVOP3Literal() && !IsPostRA)
|
// However, if 64-bit literals are allowed we still need to shrink it
|
||||||
|
// for such literal to be able to fold.
|
||||||
|
if (ST->hasVOP3Literal() &&
|
||||||
|
(!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) &&
|
||||||
|
!IsPostRA)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
|
if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s
|
||||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s
|
||||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s
|
||||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s
|
||||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s
|
||||||
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
|
||||||
|
|
||||||
declare float @llvm.fabs.f32(float)
|
declare float @llvm.fabs.f32(float)
|
||||||
declare float @llvm.fma.f32(float, float, float)
|
declare float @llvm.fma.f32(float, float, float)
|
||||||
@ -35,11 +37,19 @@ define float @v_mul_f32_vop2(float %x, float %y) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
|
; GFX1200-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f32_vop2:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%mul = fmul float %x, %y
|
%mul = fmul float %x, %y
|
||||||
ret float %mul
|
ret float %mul
|
||||||
}
|
}
|
||||||
; NOT-GFX12: codeLenInByte = 12
|
; NOT-GFX12: codeLenInByte = 12
|
||||||
; GFX1200: codeLenInByte = 28
|
; GFX1200: codeLenInByte = 28
|
||||||
|
; GFX1250: codeLenInByte = 16
|
||||||
|
|
||||||
define float @v_mul_f32_vop2_inline_imm(float %x) {
|
define float @v_mul_f32_vop2_inline_imm(float %x) {
|
||||||
; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
|
; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
|
||||||
@ -69,11 +79,19 @@ define float @v_mul_f32_vop2_inline_imm(float %x) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
|
; GFX1200-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f32_vop2_inline_imm:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%mul = fmul float %x, 4.0
|
%mul = fmul float %x, 4.0
|
||||||
ret float %mul
|
ret float %mul
|
||||||
}
|
}
|
||||||
; NOT-GFX12: codeLenInByte = 12
|
; NOT-GFX12: codeLenInByte = 12
|
||||||
; GFX1200: codeLenInByte = 28
|
; GFX1200: codeLenInByte = 28
|
||||||
|
; GFX1250: codeLenInByte = 16
|
||||||
|
|
||||||
define float @v_mul_f32_vop2_literal(float %x) {
|
define float @v_mul_f32_vop2_literal(float %x) {
|
||||||
; GFX9-LABEL: v_mul_f32_vop2_literal:
|
; GFX9-LABEL: v_mul_f32_vop2_literal:
|
||||||
@ -103,11 +121,19 @@ define float @v_mul_f32_vop2_literal(float %x) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
|
; GFX1200-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f32_vop2_literal:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%mul = fmul float %x, 123.0
|
%mul = fmul float %x, 123.0
|
||||||
ret float %mul
|
ret float %mul
|
||||||
}
|
}
|
||||||
; NOT-GFX12: codeLenInByte = 16
|
; NOT-GFX12: codeLenInByte = 16
|
||||||
; GFX1200: codeLenInByte = 32
|
; GFX1200: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 20
|
||||||
|
|
||||||
define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
|
define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
|
||||||
; GFX9-LABEL: v_mul_f32_vop3_src_mods:
|
; GFX9-LABEL: v_mul_f32_vop3_src_mods:
|
||||||
@ -137,12 +163,20 @@ define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
|
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f32_vop3_src_mods:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fabs.x = call float @llvm.fabs.f32(float %x)
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
||||||
%mul = fmul float %fabs.x, %y
|
%mul = fmul float %fabs.x, %y
|
||||||
ret float %mul
|
ret float %mul
|
||||||
}
|
}
|
||||||
; NOT-GFX12: codeLenInByte = 16
|
; NOT-GFX12: codeLenInByte = 16
|
||||||
; GFX1200: codeLenInByte = 32
|
; GFX1200: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 20
|
||||||
|
|
||||||
define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
|
define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
|
||||||
; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
|
; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
|
||||||
@ -172,6 +206,13 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
|
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fabs.x = call float @llvm.fabs.f32(float %x)
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
||||||
%mul = fmul float %fabs.x, 4.0
|
%mul = fmul float %fabs.x, 4.0
|
||||||
ret float %mul
|
ret float %mul
|
||||||
@ -179,6 +220,7 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
|
|||||||
|
|
||||||
; NOT-GFX12: codeLenInByte = 16
|
; NOT-GFX12: codeLenInByte = 16
|
||||||
; GFX1200: codeLenInByte = 32
|
; GFX1200: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 20
|
||||||
|
|
||||||
define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
|
define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
|
||||||
; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
|
; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
|
||||||
@ -209,6 +251,13 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
|
; GFX1200-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f32_vop3_src_mods_literal:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fabs.x = call float @llvm.fabs.f32(float %x)
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
||||||
%mul = fmul float %fabs.x, 123.0
|
%mul = fmul float %fabs.x, 123.0
|
||||||
ret float %mul
|
ret float %mul
|
||||||
@ -218,6 +267,7 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
|
|||||||
; GFX10: codeLenInByte = 20
|
; GFX10: codeLenInByte = 20
|
||||||
; GFX11: codeLenInByte = 20
|
; GFX11: codeLenInByte = 20
|
||||||
; GFX1200: codeLenInByte = 36
|
; GFX1200: codeLenInByte = 36
|
||||||
|
; GFX1250: codeLenInByte = 24
|
||||||
|
|
||||||
define float @v_mul_f32_vop2_frame_index(float %x) {
|
define float @v_mul_f32_vop2_frame_index(float %x) {
|
||||||
; GFX9-LABEL: v_mul_f32_vop2_frame_index:
|
; GFX9-LABEL: v_mul_f32_vop2_frame_index:
|
||||||
@ -249,6 +299,13 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
|
; GFX1200-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f32_vop2_frame_index:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%alloca = alloca i32, addrspace(5)
|
%alloca = alloca i32, addrspace(5)
|
||||||
%ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
|
%ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
|
||||||
%cast = bitcast i32 %ptrtoint to float
|
%cast = bitcast i32 %ptrtoint to float
|
||||||
@ -260,6 +317,7 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
|
|||||||
; GFX10: codeLenInByte = 20
|
; GFX10: codeLenInByte = 20
|
||||||
; GFX11: codeLenInByte = 12
|
; GFX11: codeLenInByte = 12
|
||||||
; GFX1200: codeLenInByte = 28
|
; GFX1200: codeLenInByte = 28
|
||||||
|
; GFX1250: codeLenInByte = 16
|
||||||
|
|
||||||
define float @v_fma_f32(float %x, float %y, float %z) {
|
define float @v_fma_f32(float %x, float %y, float %z) {
|
||||||
; GFX9-LABEL: v_fma_f32:
|
; GFX9-LABEL: v_fma_f32:
|
||||||
@ -289,12 +347,20 @@ define float @v_fma_f32(float %x, float %y, float %z) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
|
; GFX1200-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_fma_f32:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
|
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
}
|
}
|
||||||
|
|
||||||
; NOT-GFX12: codeLenInByte = 16
|
; NOT-GFX12: codeLenInByte = 16
|
||||||
; GFX1200: codeLenInByte = 32
|
; GFX1200: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 20
|
||||||
|
|
||||||
define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
|
define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
|
||||||
; GFX9-LABEL: v_fma_f32_src_mods:
|
; GFX9-LABEL: v_fma_f32_src_mods:
|
||||||
@ -324,6 +390,13 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
|
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_fma_f32_src_mods:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fabs.x = call float @llvm.fabs.f32(float %x)
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
||||||
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
|
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
@ -331,6 +404,7 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
|
|||||||
|
|
||||||
; NOT-GFX12: codeLenInByte = 16
|
; NOT-GFX12: codeLenInByte = 16
|
||||||
; GFX1200: codeLenInByte = 32
|
; GFX1200: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 20
|
||||||
|
|
||||||
define float @v_fmac_f32(float %x, float %y) {
|
define float @v_fmac_f32(float %x, float %y) {
|
||||||
; GFX9-LABEL: v_fmac_f32:
|
; GFX9-LABEL: v_fmac_f32:
|
||||||
@ -360,6 +434,13 @@ define float @v_fmac_f32(float %x, float %y) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
|
; GFX1200-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_fmac_f32:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fma = call float @llvm.fma.f32(float %x, float %y, float %x)
|
%fma = call float @llvm.fma.f32(float %x, float %y, float %x)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
}
|
}
|
||||||
@ -368,6 +449,7 @@ define float @v_fmac_f32(float %x, float %y) {
|
|||||||
; GFX10: codeLenInByte = 12
|
; GFX10: codeLenInByte = 12
|
||||||
; GFX11: codeLenInByte = 12
|
; GFX11: codeLenInByte = 12
|
||||||
; GFX1200: codeLenInByte = 28
|
; GFX1200: codeLenInByte = 28
|
||||||
|
; GFX1250: codeLenInByte = 16
|
||||||
|
|
||||||
define float @v_fmaak_f32(float %x, float %y) {
|
define float @v_fmaak_f32(float %x, float %y) {
|
||||||
; GFX9-LABEL: v_fmaak_f32:
|
; GFX9-LABEL: v_fmaak_f32:
|
||||||
@ -398,6 +480,13 @@ define float @v_fmaak_f32(float %x, float %y) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
|
; GFX1200-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_fmaak_f32:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
|
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
}
|
}
|
||||||
@ -406,6 +495,7 @@ define float @v_fmaak_f32(float %x, float %y) {
|
|||||||
; GFX10: codeLenInByte = 16
|
; GFX10: codeLenInByte = 16
|
||||||
; GFX11: codeLenInByte = 16
|
; GFX11: codeLenInByte = 16
|
||||||
; GFX1200: codeLenInByte = 32
|
; GFX1200: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 20
|
||||||
|
|
||||||
define float @v_fma_k_f32_src_mods(float %x, float %y) {
|
define float @v_fma_k_f32_src_mods(float %x, float %y) {
|
||||||
; GFX9-LABEL: v_fma_k_f32_src_mods:
|
; GFX9-LABEL: v_fma_k_f32_src_mods:
|
||||||
@ -436,6 +526,13 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
|
|||||||
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
|
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
|
||||||
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_fma_k_f32_src_mods:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
%fabs.x = call float @llvm.fabs.f32(float %x)
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
||||||
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0)
|
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
@ -445,6 +542,7 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
|
|||||||
; GFX10: codeLenInByte = 20
|
; GFX10: codeLenInByte = 20
|
||||||
; GFX11: codeLenInByte = 20
|
; GFX11: codeLenInByte = 20
|
||||||
; GFX1200: codeLenInByte = 36
|
; GFX1200: codeLenInByte = 36
|
||||||
|
; GFX1250: codeLenInByte = 24
|
||||||
|
|
||||||
define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
|
define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
|
||||||
; GFX9-LABEL: s_fmaak_f32:
|
; GFX9-LABEL: s_fmaak_f32:
|
||||||
@ -480,6 +578,13 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
|
|||||||
; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
|
; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
|
||||||
; GFX1200-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
|
; GFX1200-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
|
||||||
; GFX1200-NEXT: ; return to shader part epilog
|
; GFX1200-NEXT: ; return to shader part epilog
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: s_fmaak_f32:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
|
||||||
|
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
|
||||||
|
; GFX1250-NEXT: ; return to shader part epilog
|
||||||
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
|
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
}
|
}
|
||||||
@ -489,3 +594,212 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
|
|||||||
; GFX1100: codeLenInByte = 16
|
; GFX1100: codeLenInByte = 16
|
||||||
; GFX1150: codeLenInByte = 16
|
; GFX1150: codeLenInByte = 16
|
||||||
; GFX1200: codeLenInByte = 16
|
; GFX1200: codeLenInByte = 16
|
||||||
|
; GFX1250: codeLenInByte = 16
|
||||||
|
|
||||||
|
define double @v_mul_f64_vop2_literal_32(double %x) {
|
||||||
|
; GFX9-LABEL: v_mul_f64_vop2_literal_32:
|
||||||
|
; GFX9: ; %bb.0:
|
||||||
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX9-NEXT: s_mov_b32 s4, 0 ; encoding: [0x80,0x00,0x84,0xbe]
|
||||||
|
; GFX9-NEXT: s_mov_b32 s5, 0x405ec000 ; encoding: [0xff,0x00,0x85,0xbe,0x00,0xc0,0x5e,0x40]
|
||||||
|
; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] ; encoding: [0x00,0x00,0x81,0xd2,0x00,0x09,0x00,0x00]
|
||||||
|
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX10-LABEL: v_mul_f64_vop2_literal_32:
|
||||||
|
; GFX10: ; %bb.0:
|
||||||
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX10-NEXT: v_mul_f64 v[0:1], 0x405ec000, v[0:1] ; encoding: [0x00,0x00,0x65,0xd5,0xff,0x00,0x02,0x00,0x00,0xc0,0x5e,0x40]
|
||||||
|
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX11-LABEL: v_mul_f64_vop2_literal_32:
|
||||||
|
; GFX11: ; %bb.0:
|
||||||
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
|
||||||
|
; GFX11-NEXT: v_mul_f64 v[0:1], 0x405ec000, v[0:1] ; encoding: [0x00,0x00,0x28,0xd7,0xff,0x00,0x02,0x00,0x00,0xc0,0x5e,0x40]
|
||||||
|
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1200-LABEL: v_mul_f64_vop2_literal_32:
|
||||||
|
; GFX1200: ; %bb.0:
|
||||||
|
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1200-NEXT: v_mul_f64_e32 v[0:1], 0x405ec000, v[0:1] ; encoding: [0xff,0x00,0x00,0x0c,0x00,0xc0,0x5e,0x40]
|
||||||
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f64_vop2_literal_32:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 0x405ec000, v[0:1] ; encoding: [0xff,0x00,0x00,0x0c,0x00,0xc0,0x5e,0x40]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
%mul = fmul double %x, 123.0
|
||||||
|
ret double %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
; GFX9: codeLenInByte = 28
|
||||||
|
; GFX10: codeLenInByte = 20
|
||||||
|
; GFX1100: codeLenInByte = 20
|
||||||
|
; GFX1150: codeLenInByte = 20
|
||||||
|
; GFX1250: codeLenInByte = 20
|
||||||
|
|
||||||
|
define double @v_mul_f64_vop2_literal_64(double %x) {
|
||||||
|
; GFX9-LABEL: v_mul_f64_vop2_literal_64:
|
||||||
|
; GFX9: ; %bb.0:
|
||||||
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX9-NEXT: s_mov_b32 s4, 0x66666666 ; encoding: [0xff,0x00,0x84,0xbe,0x66,0x66,0x66,0x66]
|
||||||
|
; GFX9-NEXT: s_mov_b32 s5, 0x405ec666 ; encoding: [0xff,0x00,0x85,0xbe,0x66,0xc6,0x5e,0x40]
|
||||||
|
; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] ; encoding: [0x00,0x00,0x81,0xd2,0x00,0x09,0x00,0x00]
|
||||||
|
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX10-LABEL: v_mul_f64_vop2_literal_64:
|
||||||
|
; GFX10: ; %bb.0:
|
||||||
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX10-NEXT: s_mov_b32 s4, 0x66666666 ; encoding: [0xff,0x03,0x84,0xbe,0x66,0x66,0x66,0x66]
|
||||||
|
; GFX10-NEXT: s_mov_b32 s5, 0x405ec666 ; encoding: [0xff,0x03,0x85,0xbe,0x66,0xc6,0x5e,0x40]
|
||||||
|
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] ; encoding: [0x00,0x00,0x65,0xd5,0x00,0x09,0x00,0x00]
|
||||||
|
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX11-LABEL: v_mul_f64_vop2_literal_64:
|
||||||
|
; GFX11: ; %bb.0:
|
||||||
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
|
||||||
|
; GFX11-NEXT: s_mov_b32 s0, 0x66666666 ; encoding: [0xff,0x00,0x80,0xbe,0x66,0x66,0x66,0x66]
|
||||||
|
; GFX11-NEXT: s_mov_b32 s1, 0x405ec666 ; encoding: [0xff,0x00,0x81,0xbe,0x66,0xc6,0x5e,0x40]
|
||||||
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; encoding: [0x09,0x00,0x87,0xbf]
|
||||||
|
; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] ; encoding: [0x00,0x00,0x28,0xd7,0x00,0x01,0x00,0x00]
|
||||||
|
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1200-LABEL: v_mul_f64_vop2_literal_64:
|
||||||
|
; GFX1200: ; %bb.0:
|
||||||
|
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1200-NEXT: s_mov_b32 s0, 0x66666666 ; encoding: [0xff,0x00,0x80,0xbe,0x66,0x66,0x66,0x66]
|
||||||
|
; GFX1200-NEXT: s_mov_b32 s1, 0x405ec666 ; encoding: [0xff,0x00,0x81,0xbe,0x66,0xc6,0x5e,0x40]
|
||||||
|
; GFX1200-NEXT: s_wait_alu 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf]
|
||||||
|
; GFX1200-NEXT: v_mul_f64_e32 v[0:1], s[0:1], v[0:1] ; encoding: [0x00,0x00,0x00,0x0c]
|
||||||
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_mul_f64_vop2_literal_64:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], lit64(0x405ec66666666666), v[0:1] ; encoding: [0xfe,0x00,0x00,0x0c,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
%mul = fmul double %x, 123.1
|
||||||
|
ret double %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
; GFX9: codeLenInByte = 32
|
||||||
|
; GFX10: codeLenInByte = 32
|
||||||
|
; GFX1100: codeLenInByte = 36
|
||||||
|
; GFX1150: codeLenInByte = 36
|
||||||
|
; GFX1250: codeLenInByte = 24
|
||||||
|
|
||||||
|
define i64 @v_add_u64_vop2_literal_32(i64 %x) {
|
||||||
|
; GFX9-LABEL: v_add_u64_vop2_literal_32:
|
||||||
|
; GFX9: ; %bb.0:
|
||||||
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7b, v0 ; encoding: [0xff,0x00,0x00,0x32,0x7b,0x00,0x00,0x00]
|
||||||
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x38]
|
||||||
|
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX10-LABEL: v_add_u64_vop2_literal_32:
|
||||||
|
; GFX10: ; %bb.0:
|
||||||
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7b, v0 ; encoding: [0x00,0x6a,0x0f,0xd7,0xff,0x00,0x02,0x00,0x7b,0x00,0x00,0x00]
|
||||||
|
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; encoding: [0x01,0x7d,0x28,0xd5,0x80,0x02,0xaa,0x01]
|
||||||
|
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX11-LABEL: v_add_u64_vop2_literal_32:
|
||||||
|
; GFX11: ; %bb.0:
|
||||||
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
|
||||||
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x7b, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x7b,0x00,0x00,0x00]
|
||||||
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf]
|
||||||
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x80,0x02,0xaa,0x01]
|
||||||
|
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1200-LABEL: v_add_u64_vop2_literal_32:
|
||||||
|
; GFX1200: ; %bb.0:
|
||||||
|
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1200-NEXT: v_add_co_u32 v0, vcc_lo, 0x7b, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x7b,0x00,0x00,0x00]
|
||||||
|
; GFX1200-NEXT: s_wait_alu 0xfffd ; encoding: [0xfd,0xff,0x88,0xbf]
|
||||||
|
; GFX1200-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x80,0x02,0xaa,0x01]
|
||||||
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_add_u64_vop2_literal_32:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 0x7b ; encoding: [0x00,0x00,0x52,0xd6,0x00,0x01,0xfd,0x03,0x7b,0x00,0x00,0x00]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
%add = add i64 %x, 123
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
; GFX9: codeLenInByte = 20
|
||||||
|
; GFX10: codeLenInByte = 28
|
||||||
|
; GFX1100: codeLenInByte = 32
|
||||||
|
; GFX1150: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 24
|
||||||
|
|
||||||
|
define i64 @v_add_u64_vop2_literal_64(i64 %x) {
|
||||||
|
; GFX9-LABEL: v_add_u64_vop2_literal_64:
|
||||||
|
; GFX9: ; %bb.0:
|
||||||
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x12345678, v0 ; encoding: [0xff,0x00,0x00,0x32,0x78,0x56,0x34,0x12]
|
||||||
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc ; encoding: [0x81,0x02,0x02,0x38]
|
||||||
|
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX10-LABEL: v_add_u64_vop2_literal_64:
|
||||||
|
; GFX10: ; %bb.0:
|
||||||
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
|
||||||
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x12345678, v0 ; encoding: [0x00,0x6a,0x0f,0xd7,0xff,0x00,0x02,0x00,0x78,0x56,0x34,0x12]
|
||||||
|
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo ; encoding: [0x01,0x7d,0x28,0xd5,0x81,0x02,0xaa,0x01]
|
||||||
|
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX11-LABEL: v_add_u64_vop2_literal_64:
|
||||||
|
; GFX11: ; %bb.0:
|
||||||
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
|
||||||
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x12345678, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x78,0x56,0x34,0x12]
|
||||||
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf]
|
||||||
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x81,0x02,0xaa,0x01]
|
||||||
|
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1200-LABEL: v_add_u64_vop2_literal_64:
|
||||||
|
; GFX1200: ; %bb.0:
|
||||||
|
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
|
||||||
|
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1200-NEXT: v_add_co_u32 v0, vcc_lo, 0x12345678, v0 ; encoding: [0x00,0x6a,0x00,0xd7,0xff,0x00,0x02,0x00,0x78,0x56,0x34,0x12]
|
||||||
|
; GFX1200-NEXT: s_wait_alu 0xfffd ; encoding: [0xfd,0xff,0x88,0xbf]
|
||||||
|
; GFX1200-NEXT: v_add_co_ci_u32_e64 v1, null, 1, v1, vcc_lo ; encoding: [0x01,0x7c,0x20,0xd5,0x81,0x02,0xaa,0x01]
|
||||||
|
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
;
|
||||||
|
; GFX1250-LABEL: v_add_u64_vop2_literal_64:
|
||||||
|
; GFX1250: ; %bb.0:
|
||||||
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
|
||||||
|
; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
|
||||||
|
; GFX1250-NEXT: s_mov_b64 s[0:1], lit64(0x112345678) ; encoding: [0xfe,0x01,0x80,0xbe,0x78,0x56,0x34,0x12,0x01,0x00,0x00,0x00]
|
||||||
|
; GFX1250-NEXT: s_wait_alu 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf]
|
||||||
|
; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] ; encoding: [0x00,0x00,0x52,0xd6,0x00,0x01,0x01,0x00]
|
||||||
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
|
||||||
|
%add = add i64 %x, 4600387192
|
||||||
|
ret i64 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
; GFX9: codeLenInByte = 20
|
||||||
|
; GFX10: codeLenInByte = 28
|
||||||
|
; GFX1100: codeLenInByte = 32
|
||||||
|
; GFX1150: codeLenInByte = 32
|
||||||
|
; GFX1250: codeLenInByte = 36
|
||||||
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||||
|
; NOT-GFX12: {{.*}}
|
||||||
|
324
llvm/test/CodeGen/AMDGPU/literal64.ll
Normal file
324
llvm/test/CodeGen/AMDGPU/literal64.ll
Normal file
@ -0,0 +1,324 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||||
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GCN-SDAG %s
|
||||||
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GCN-GISEL %s
|
||||||
|
|
||||||
|
define amdgpu_ps i64 @s_add_u64(i64 inreg %a) {
|
||||||
|
; GCN-LABEL: s_add_u64:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_add_nc_u64 s[0:1], s[0:1], lit64(0xf12345678)
|
||||||
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
|
%result = add i64 %a, 64729929336
|
||||||
|
ret i64 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps void @v_add_u64(i64 %a, ptr addrspace(1) %out) {
|
||||||
|
; GCN-SDAG-LABEL: v_add_u64:
|
||||||
|
; GCN-SDAG: ; %bb.0:
|
||||||
|
; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0xf12345678)
|
||||||
|
; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||||
|
; GCN-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
|
||||||
|
; GCN-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||||
|
; GCN-SDAG-NEXT: s_endpgm
|
||||||
|
;
|
||||||
|
; GCN-GISEL-LABEL: v_add_u64:
|
||||||
|
; GCN-GISEL: ; %bb.0:
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0xf12345678)
|
||||||
|
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
|
; GCN-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[4:5]
|
||||||
|
; GCN-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||||
|
; GCN-GISEL-NEXT: s_endpgm
|
||||||
|
%result = add i64 %a, 64729929336
|
||||||
|
store i64 %result, ptr addrspace(1) %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps i64 @s_add_neg_u64(i64 inreg %a) {
|
||||||
|
; GCN-LABEL: s_add_neg_u64:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_add_nc_u64 s[0:1], s[0:1], lit64(0xfffffff0edcba988)
|
||||||
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
|
%result = sub i64 %a, 64729929336
|
||||||
|
ret i64 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps void @v_add_neg_u64(i64 %a, ptr addrspace(1) %out) {
|
||||||
|
; GCN-SDAG-LABEL: v_add_neg_u64:
|
||||||
|
; GCN-SDAG: ; %bb.0:
|
||||||
|
; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0xfffffff0edcba988)
|
||||||
|
; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||||
|
; GCN-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
|
||||||
|
; GCN-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||||
|
; GCN-SDAG-NEXT: s_endpgm
|
||||||
|
;
|
||||||
|
; GCN-GISEL-LABEL: v_add_neg_u64:
|
||||||
|
; GCN-GISEL: ; %bb.0:
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0xfffffff0edcba988)
|
||||||
|
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
|
; GCN-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[4:5]
|
||||||
|
; GCN-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||||
|
; GCN-GISEL-NEXT: s_endpgm
|
||||||
|
%result = sub i64 %a, 64729929336
|
||||||
|
store i64 %result, ptr addrspace(1) %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps i64 @s_sub_u64(i64 inreg %a) {
|
||||||
|
; GCN-LABEL: s_sub_u64:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_sub_nc_u64 s[0:1], lit64(0xf12345678), s[0:1]
|
||||||
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
|
%result = sub i64 64729929336, %a
|
||||||
|
ret i64 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps void @v_sub_u64(i64 %a, ptr addrspace(1) %out) {
|
||||||
|
; GCN-LABEL: v_sub_u64:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: v_sub_co_u32 v0, vcc_lo, 0x12345678, v0
|
||||||
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
|
; GCN-NEXT: v_sub_co_ci_u32_e64 v1, null, 15, v1, vcc_lo
|
||||||
|
; GCN-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||||
|
; GCN-NEXT: s_endpgm
|
||||||
|
%result = sub i64 64729929336, %a
|
||||||
|
store i64 %result, ptr addrspace(1) %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @v_mov_b64_double(ptr addrspace(1) %ptr) {
|
||||||
|
; GCN-LABEL: v_mov_b64_double:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GCN-NEXT: global_load_b64 v[4:5], v[0:1], off
|
||||||
|
; GCN-NEXT: s_mov_b32 s0, 0
|
||||||
|
; GCN-NEXT: .LBB6_1: ; %atomicrmw.start
|
||||||
|
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||||
|
; GCN-NEXT: s_wait_loadcnt 0x0
|
||||||
|
; GCN-NEXT: v_add_f64_e32 v[2:3], lit64(0x4063233333333333), v[4:5]
|
||||||
|
; GCN-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
|
||||||
|
; GCN-NEXT: s_wait_loadcnt 0x0
|
||||||
|
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5]
|
||||||
|
; GCN-NEXT: s_wait_xcnt 0x0
|
||||||
|
; GCN-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
|
||||||
|
; GCN-NEXT: s_wait_alu 0xfffe
|
||||||
|
; GCN-NEXT: s_or_b32 s0, vcc_lo, s0
|
||||||
|
; GCN-NEXT: s_wait_alu 0xfffe
|
||||||
|
; GCN-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
|
||||||
|
; GCN-NEXT: s_cbranch_execnz .LBB6_1
|
||||||
|
; GCN-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||||
|
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
||||||
|
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||||
|
%result = atomicrmw fadd ptr addrspace(1) %ptr, double 153.1 monotonic
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @v_mov_b64_int(ptr addrspace(1) %ptr) {
|
||||||
|
; GCN-LABEL: v_mov_b64_int:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0xf12345678)
|
||||||
|
; GCN-NEXT: global_atomic_add_u64 v[0:1], v[2:3], off scope:SCOPE_SYS
|
||||||
|
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||||
|
%result = atomicrmw add ptr addrspace(1) %ptr, i64 64729929336 monotonic
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @store_double(ptr addrspace(1) %ptr) {
|
||||||
|
; GCN-LABEL: store_double:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4063233333333333)
|
||||||
|
; GCN-NEXT: global_store_b64 v[0:1], v[2:3], off
|
||||||
|
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||||
|
store double 153.1, ptr addrspace(1) %ptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define i1 @class_f64() noinline optnone {
|
||||||
|
; GCN-SDAG-LABEL: class_f64:
|
||||||
|
; GCN-SDAG: ; %bb.0:
|
||||||
|
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GCN-SDAG-NEXT: s_mov_b32 s2, 1
|
||||||
|
; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0x4063233333333333)
|
||||||
|
; GCN-SDAG-NEXT: s_wait_alu 0xfffe
|
||||||
|
; GCN-SDAG-NEXT: v_cmp_class_f64_e64 s0, s[0:1], s2
|
||||||
|
; GCN-SDAG-NEXT: s_wait_alu 0xf1ff
|
||||||
|
; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
|
||||||
|
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
|
||||||
|
;
|
||||||
|
; GCN-GISEL-LABEL: class_f64:
|
||||||
|
; GCN-GISEL: ; %bb.0:
|
||||||
|
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GCN-GISEL-NEXT: s_mov_b32 s2, 1
|
||||||
|
; GCN-GISEL-NEXT: s_mov_b64 s[0:1], lit64(0x4063233333333333)
|
||||||
|
; GCN-GISEL-NEXT: s_wait_alu 0xfffe
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b32_e32 v2, s2
|
||||||
|
; GCN-GISEL-NEXT: v_cmp_class_f64_e64 s0, v[0:1], v2
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 1
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||||
|
; GCN-GISEL-NEXT: s_wait_alu 0xf1ff
|
||||||
|
; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, v0, s0
|
||||||
|
; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
|
||||||
|
%result = call i1 @llvm.amdgcn.class.f64(double 153.1, i32 1) nounwind readnone
|
||||||
|
ret i1 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @rsq_f64() {
|
||||||
|
; GCN-LABEL: rsq_f64:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||||
|
; GCN-NEXT: s_wait_kmcnt 0x0
|
||||||
|
; GCN-NEXT: v_rsq_f64_e32 v[0:1], lit64(0x4063233333333333)
|
||||||
|
; GCN-NEXT: s_set_pc_i64 s[30:31]
|
||||||
|
%result = call double @llvm.amdgcn.rsq.f64(double 153.1) nounwind readnone
|
||||||
|
ret double %result
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps i64 @s_and_b64(i64 inreg %a) {
|
||||||
|
; GCN-LABEL: s_and_b64:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], lit64(0xf12345678)
|
||||||
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
|
%result = and i64 %a, 64729929336
|
||||||
|
ret i64 %result
|
||||||
|
}
|
||||||
|
|
||||||
|
; No V_AND_B64 instruction, it has to be split
|
||||||
|
|
||||||
|
define amdgpu_ps void @v_and_b64(i64 %a, ptr addrspace(1) %out) {
|
||||||
|
; GCN-SDAG-LABEL: v_and_b64:
|
||||||
|
; GCN-SDAG: ; %bb.0:
|
||||||
|
; GCN-SDAG-NEXT: v_and_b32_e32 v1, 15, v1
|
||||||
|
; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0x12345678, v0
|
||||||
|
; GCN-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||||
|
; GCN-SDAG-NEXT: s_endpgm
|
||||||
|
;
|
||||||
|
; GCN-GISEL-LABEL: v_and_b64:
|
||||||
|
; GCN-GISEL: ; %bb.0:
|
||||||
|
; GCN-GISEL-NEXT: v_and_b32_e32 v0, 0x12345678, v0
|
||||||
|
; GCN-GISEL-NEXT: v_and_b32_e32 v1, 15, v1
|
||||||
|
; GCN-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off
|
||||||
|
; GCN-GISEL-NEXT: s_endpgm
|
||||||
|
%result = and i64 %a, 64729929336
|
||||||
|
store i64 %result, ptr addrspace(1) %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps <2 x float> @v_add_f64_200.1(double %a) {
|
||||||
|
; GCN-LABEL: v_add_f64_200.1:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: v_add_f64_e32 v[0:1], lit64(0x4069033333333333), v[0:1]
|
||||||
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
|
%add = fadd double %a, 200.1
|
||||||
|
%ret = bitcast double %add to <2 x float>
|
||||||
|
ret <2 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; 200.0 can be encoded as 32-bit literal
|
||||||
|
|
||||||
|
define amdgpu_ps <2 x float> @v_add_f64_200.0(double %a) {
|
||||||
|
; GCN-LABEL: v_add_f64_200.0:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: v_add_f64_e32 v[0:1], 0x40690000, v[0:1]
|
||||||
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
|
%add = fadd double %a, 200.0
|
||||||
|
%ret = bitcast double %add to <2 x float>
|
||||||
|
ret <2 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; No folding into VOP3
|
||||||
|
|
||||||
|
define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) {
|
||||||
|
; GCN-SDAG-LABEL: v_lshl_add_u64:
|
||||||
|
; GCN-SDAG: ; %bb.0:
|
||||||
|
; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0xf12345678)
|
||||||
|
; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||||
|
; GCN-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, s[0:1]
|
||||||
|
; GCN-SDAG-NEXT: ; return to shader part epilog
|
||||||
|
;
|
||||||
|
; GCN-GISEL-LABEL: v_lshl_add_u64:
|
||||||
|
; GCN-GISEL: ; %bb.0:
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0xf12345678)
|
||||||
|
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
|
; GCN-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, v[2:3]
|
||||||
|
; GCN-GISEL-NEXT: ; return to shader part epilog
|
||||||
|
%shl = shl i64 %a, 1
|
||||||
|
%add = add i64 %shl, 64729929336
|
||||||
|
%ret = bitcast i64 %add to <2 x float>
|
||||||
|
ret <2 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; No folding into VOP2 promoted to VOP3
|
||||||
|
|
||||||
|
define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) {
|
||||||
|
; GCN-LABEL: v_fma_f64:
|
||||||
|
; GCN: ; %bb.0:
|
||||||
|
; GCN-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
|
||||||
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
||||||
|
; GCN-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
|
||||||
|
; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
|
||||||
|
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
|
||||||
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||||
|
; GCN-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
|
||||||
|
; GCN-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
|
||||||
|
; GCN-NEXT: ; return to shader part epilog
|
||||||
|
%r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone
|
||||||
|
%r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone
|
||||||
|
%r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone
|
||||||
|
%ret = bitcast double %r3 to <2 x float>
|
||||||
|
ret <2 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps <2 x float> @v_add_neg_f64(double %a) {
|
||||||
|
; GCN-SDAG-LABEL: v_add_neg_f64:
|
||||||
|
; GCN-SDAG: ; %bb.0:
|
||||||
|
; GCN-SDAG-NEXT: s_mov_b64 s[0:1], lit64(0x4069033333333333)
|
||||||
|
; GCN-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||||
|
; GCN-SDAG-NEXT: v_add_f64_e64 v[0:1], -v[0:1], s[0:1]
|
||||||
|
; GCN-SDAG-NEXT: ; return to shader part epilog
|
||||||
|
;
|
||||||
|
; GCN-GISEL-LABEL: v_add_neg_f64:
|
||||||
|
; GCN-GISEL: ; %bb.0:
|
||||||
|
; GCN-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
|
||||||
|
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
|
; GCN-GISEL-NEXT: v_add_f64_e64 v[0:1], -v[0:1], v[2:3]
|
||||||
|
; GCN-GISEL-NEXT: ; return to shader part epilog
|
||||||
|
%fneg = fsub double -0.0, %a
|
||||||
|
%add = fadd double %fneg, 200.1
|
||||||
|
%ret = bitcast double %add to <2 x float>
|
||||||
|
ret <2 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define amdgpu_ps <2 x float> @v_cndmask(double %a) {
|
||||||
|
; GCN-SDAG-LABEL: v_cndmask:
|
||||||
|
; GCN-SDAG: ; %bb.0:
|
||||||
|
; GCN-SDAG-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[0:1]
|
||||||
|
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x40632000
|
||||||
|
; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x33333333, 0, vcc_lo
|
||||||
|
; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||||
|
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x40690333, v1, vcc_lo
|
||||||
|
; GCN-SDAG-NEXT: ; return to shader part epilog
|
||||||
|
;
|
||||||
|
; GCN-GISEL-LABEL: v_cndmask:
|
||||||
|
; GCN-GISEL: ; %bb.0:
|
||||||
|
; GCN-GISEL-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[0:1]
|
||||||
|
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x40690333
|
||||||
|
; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x33333333, 0, vcc_lo
|
||||||
|
; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||||
|
; GCN-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0x40632000, vcc_lo
|
||||||
|
; GCN-GISEL-NEXT: ; return to shader part epilog
|
||||||
|
%cmp = fcmp oeq double %a, 0.0
|
||||||
|
%sel = select i1 %cmp, double 153.0, double 200.1
|
||||||
|
%ret = bitcast double %sel to <2 x float>
|
||||||
|
ret <2 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone
|
||||||
|
declare double @llvm.amdgcn.rsq.f64(double) nounwind readnone
|
||||||
|
declare double @llvm.fma.f64(double, double, double) nounwind readnone
|
Loading…
x
Reference in New Issue
Block a user