[AMDGPU] Per-subtarget DPP instruction classification (#153096)
This is NFCI at this point.
This commit is contained in:
parent
b9ecee9d47
commit
ea14834966
@ -5653,7 +5653,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
|
||||
unsigned SplitSize = 32;
|
||||
if (IID == Intrinsic::amdgcn_update_dpp && (Size % 64 == 0) &&
|
||||
ST.hasDPALU_DPP() &&
|
||||
AMDGPU::isLegalDPALU_DPPControl(MI.getOperand(4).getImm()))
|
||||
AMDGPU::isLegalDPALU_DPPControl(ST, MI.getOperand(4).getImm()))
|
||||
SplitSize = 64;
|
||||
|
||||
if (Size == SplitSize) {
|
||||
|
@ -5052,11 +5052,13 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
|
||||
if (DppCtrlIdx >= 0) {
|
||||
unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
|
||||
|
||||
if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
|
||||
AMDGPU::isDPALU_DPP(MII.get(Opc))) {
|
||||
// DP ALU DPP is supported for row_newbcast only on GFX9*
|
||||
if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
|
||||
AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
|
||||
// DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
|
||||
// only on GFX12.
|
||||
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
|
||||
Error(S, "DP ALU dpp only supports row_newbcast");
|
||||
Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
|
||||
: "DP ALU dpp only supports row_newbcast");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -549,11 +549,17 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
|
||||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
|
||||
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
|
||||
assert(DppCtrl && DppCtrl->isImm());
|
||||
if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) {
|
||||
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
|
||||
assert(DppCtrl && DppCtrl->isImm());
|
||||
unsigned DppCtrlVal = DppCtrl->getImm();
|
||||
if ((MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
|
||||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp)) {
|
||||
if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP)) {
|
||||
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move is unsupported\n");
|
||||
// Split it.
|
||||
return false;
|
||||
}
|
||||
if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal)) {
|
||||
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
|
||||
" control value\n");
|
||||
// Let it split, then control may become legal.
|
||||
@ -709,6 +715,20 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP) &&
|
||||
AMDGPU::isDPALU_DPP32BitOpc(OrigOp)) {
|
||||
LLVM_DEBUG(dbgs() << " " << OrigMI
|
||||
<< " failed: DPP ALU DPP is not supported\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal) &&
|
||||
AMDGPU::isDPALU_DPP(TII->get(OrigOp), *ST)) {
|
||||
LLVM_DEBUG(dbgs() << " " << OrigMI
|
||||
<< " failed: not valid 64-bit DPP control value\n");
|
||||
break;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
|
||||
if (Use == Src0) {
|
||||
if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
|
||||
|
@ -976,8 +976,10 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
|
||||
unsigned Imm = MI->getOperand(OpNo).getImm();
|
||||
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
|
||||
|
||||
if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
|
||||
O << " /* DP ALU dpp only supports row_newbcast */";
|
||||
if (!AMDGPU::isLegalDPALU_DPPControl(STI, Imm) &&
|
||||
AMDGPU::isDPALU_DPP(Desc, STI)) {
|
||||
O << " /* DP ALU dpp only supports "
|
||||
<< (isGFX12(STI) ? "row_share" : "row_newbcast") << " */";
|
||||
return;
|
||||
}
|
||||
if (Imm <= DppCtrl::QUAD_PERM_LAST) {
|
||||
|
@ -6621,7 +6621,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
|
||||
unsigned SplitSize = 32;
|
||||
if (IID == Intrinsic::amdgcn_update_dpp && (ValSize % 64 == 0) &&
|
||||
ST->hasDPALU_DPP() &&
|
||||
AMDGPU::isLegalDPALU_DPPControl(N->getConstantOperandVal(3)))
|
||||
AMDGPU::isLegalDPALU_DPPControl(*ST, N->getConstantOperandVal(3)))
|
||||
SplitSize = 64;
|
||||
|
||||
auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,
|
||||
|
@ -2616,9 +2616,9 @@ std::pair<MachineInstr*, MachineInstr*>
|
||||
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
|
||||
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
|
||||
|
||||
if (ST.hasMovB64() &&
|
||||
if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
|
||||
AMDGPU::isLegalDPALU_DPPControl(
|
||||
getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
|
||||
ST, getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
|
||||
MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
|
||||
return std::pair(&MI, nullptr);
|
||||
}
|
||||
@ -5433,7 +5433,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
||||
}
|
||||
|
||||
if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
|
||||
!AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) {
|
||||
!AMDGPU::isLegalDPALU_DPPControl(ST, DC) &&
|
||||
AMDGPU::isDPALU_DPP(Desc, ST)) {
|
||||
ErrInfo = "Invalid dpp_ctrl value: "
|
||||
"DP ALU dpp only support row_newbcast";
|
||||
return false;
|
||||
|
@ -1954,6 +1954,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
|
||||
!eq(VT, v2f16) : VCSrc_v2f16,
|
||||
!eq(VT, v2bf16) : VCSrc_v2bf16,
|
||||
!eq(VT, f32) : VCSrc_f32,
|
||||
!eq(VT, f64) : VCSrc_f64,
|
||||
!eq(VT, v2i32) : VCSrc_v2b32,
|
||||
1 : VCSrc_b32);
|
||||
}
|
||||
|
@ -3309,7 +3309,33 @@ bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
|
||||
bool isDPALU_DPP32BitOpc(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case AMDGPU::V_MUL_LO_U32_e64:
|
||||
case AMDGPU::V_MUL_LO_U32_e64_dpp:
|
||||
case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
|
||||
case AMDGPU::V_MUL_HI_U32_e64:
|
||||
case AMDGPU::V_MUL_HI_U32_e64_dpp:
|
||||
case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
|
||||
case AMDGPU::V_MUL_HI_I32_e64:
|
||||
case AMDGPU::V_MUL_HI_I32_e64_dpp:
|
||||
case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
|
||||
case AMDGPU::V_MAD_U32_e64:
|
||||
case AMDGPU::V_MAD_U32_e64_dpp:
|
||||
case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST) {
|
||||
if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
|
||||
return false;
|
||||
|
||||
if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
|
||||
return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
|
||||
|
||||
return hasAny64BitVGPROperands(OpDesc);
|
||||
}
|
||||
|
||||
|
@ -1750,15 +1750,22 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
|
||||
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
||||
|
||||
LLVM_READNONE
|
||||
inline bool isLegalDPALU_DPPControl(unsigned DC) {
|
||||
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
|
||||
inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
|
||||
if (isGFX12(ST))
|
||||
return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
|
||||
if (isGFX90A(ST))
|
||||
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \returns true if an instruction may have a 64-bit VGPR operand.
|
||||
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
|
||||
|
||||
/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
|
||||
bool isDPALU_DPP32BitOpc(unsigned Opc);
|
||||
|
||||
/// \returns true if an instruction is a DP ALU DPP.
|
||||
bool isDPALU_DPP(const MCInstrDesc &OpDesc);
|
||||
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST);
|
||||
|
||||
/// \returns true if the intrinsic is divergent
|
||||
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
|
||||
|
@ -2084,6 +2084,9 @@ multiclass VOP3_Realtriple_gfx11_gfx12<bits<10> op> :
|
||||
multiclass VOP3_Real_Base_gfx11_gfx12<bits<10> op> :
|
||||
VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Gen, op>;
|
||||
|
||||
multiclass VOP3_Real_Base_gfx11_gfx12_not_gfx1250<bits<10> op> :
|
||||
VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Not12_50Gen, op>;
|
||||
|
||||
multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
|
||||
string asmName> :
|
||||
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
|
||||
@ -2211,9 +2214,9 @@ defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>;
|
||||
defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>;
|
||||
defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>;
|
||||
defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>;
|
||||
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12<0x32c>;
|
||||
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12<0x32d>;
|
||||
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12<0x32e>;
|
||||
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32c>;
|
||||
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32d>;
|
||||
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32e>;
|
||||
defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32f>;
|
||||
defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x338, "v_lshlrev_b16">;
|
||||
defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
|
||||
@ -2242,6 +2245,10 @@ let AssemblerPredicate = isGFX11Plus in {
|
||||
}
|
||||
|
||||
// These instructions differ from GFX12 variant by supporting DPP:
|
||||
defm V_MUL_LO_U32 : VOP3Only_Realtriple_gfx1250<0x32c>;
|
||||
defm V_MUL_HI_U32 : VOP3Only_Realtriple_gfx1250<0x32d>;
|
||||
defm V_MUL_HI_I32 : VOP3Only_Realtriple_gfx1250<0x32e>;
|
||||
|
||||
defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>;
|
||||
defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>;
|
||||
defm V_PERM_PK16_B8_U4 : VOP3Only_Real_Base_gfx1250<0x243>;
|
||||
|
@ -1,12 +1,13 @@
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,GFX942
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A,DPP64-GFX9 -DCTL=row_newbcast
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,DPP64-GFX9,GFX942 -DCTL=row_newbcast
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10 -DCTL=row_share
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11 -DCTL=row_share
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX1250 -DCTL=row_share
|
||||
|
||||
; GCN-LABEL: {{^}}dpp64_ceil:
|
||||
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
|
||||
; DPP64: v_ceil_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP64: v_ceil_f64_dpp [[V]], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
|
||||
@ -21,8 +22,8 @@ define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) {
|
||||
|
||||
; GCN-LABEL: {{^}}dpp64_rcp:
|
||||
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
|
||||
; DPP64: v_rcp_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP64-GFX9: v_rcp_f64_dpp [[V]], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp64_rcp(ptr addrspace(1) %arg, i64 %in1) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
|
||||
@ -52,9 +53,9 @@ define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(ptr addrspace(1) %arg, i64
|
||||
|
||||
; GCN-LABEL: {{^}}dpp64_div:
|
||||
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
|
||||
; DPPMOV64: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; GFX10PLUS-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPPMOV64: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; GCN: v_div_scale_f64
|
||||
; GCN: v_rcp_f64_e32
|
||||
define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
|
||||
@ -69,6 +70,25 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; On GFX9 it fails to combine because v_mul_lo_u32 has no e32 or dpp form.
|
||||
; GCN-LABEL: {{^}}dpp_mul_row_share:
|
||||
; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
|
||||
; DPP64-GFX9: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]]
|
||||
; DPP64-GFX9: v_mov_b32_dpp [[V2]], [[V2]] {{row_share|row_newbcast}}:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; DPP64-GFX9: v_mul_lo_u32 [[V]], [[V2]], [[V]]{{$}}
|
||||
; GFX1250: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]]
|
||||
; GFX1250: v_mov_b32_dpp [[V2]], [[V2]] {{row_share|row_newbcast}}:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
; GFX1250: v_mul_lo_u32 [[V]], [[V2]], [[V]]{{$}}
|
||||
define amdgpu_kernel void @dpp_mul_row_share(ptr addrspace(1) %arg) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id
|
||||
%load = load i32, ptr addrspace(1) %gep
|
||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 336, i32 15, i32 15, i1 1)
|
||||
%mul = mul i32 %tmp0, %load
|
||||
store i32 %mul, ptr addrspace(1) %gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp64_loop:
|
||||
; GCN: v_mov_b32_dpp
|
||||
; DPP64: v_mov_b32_dpp
|
||||
|
Loading…
x
Reference in New Issue
Block a user