[AMDGPU] Add suffix _d4 to tensor load/store with 4 groups D#, NFC (#184176)
Rename TENSOR_LOAD_TO_LDS to TENSOR_LOAD_TO_LDS_d4; Rename TENSOR_STORE_FROM_LDS to TENSOR_STORE_FROM_LDS_d4; Also rename function names in a couple of tests to reflect this change.
This commit is contained in:
parent
1953b87a31
commit
5b144c0aec
@ -8,12 +8,12 @@ typedef int v8i __attribute__((ext_vector_type(8)));
|
||||
static v4i v4i_zeros = (v4i){0,0,0,0};
|
||||
static v8i v8i_zeros = (v8i){0,0,0,0,0,0,0,0};
|
||||
|
||||
// CHECK-GFX1250-LABEL: @test_amdgcn_tensor_load_to_lds(
|
||||
// CHECK-GFX1250-LABEL: @test_amdgcn_tensor_load_to_lds_d4(
|
||||
// CHECK-GFX1250-NEXT: entry:
|
||||
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], <8 x i32> zeroinitializer, i32 0)
|
||||
// CHECK-GFX1250-NEXT: ret void
|
||||
//
|
||||
void test_amdgcn_tensor_load_to_lds(v4i sg0, v8i sg1, v4i sg2, v4i sg3)
|
||||
void test_amdgcn_tensor_load_to_lds_d4(v4i sg0, v8i sg1, v4i sg2, v4i sg3)
|
||||
{
|
||||
__builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, v8i_zeros, 0);
|
||||
}
|
||||
@ -28,12 +28,12 @@ void test_amdgcn_tensor_load_to_lds_d2(v4i sg0, v8i sg1)
|
||||
__builtin_amdgcn_tensor_load_to_lds(sg0, sg1, v4i_zeros, v4i_zeros, v8i_zeros, 27);
|
||||
}
|
||||
|
||||
// CHECK-GFX1250-LABEL: @test_amdgcn_tensor_store_from_lds(
|
||||
// CHECK-GFX1250-LABEL: @test_amdgcn_tensor_store_from_lds_d4(
|
||||
// CHECK-GFX1250-NEXT: entry:
|
||||
// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], <8 x i32> zeroinitializer, i32 22)
|
||||
// CHECK-GFX1250-NEXT: ret void
|
||||
//
|
||||
void test_amdgcn_tensor_store_from_lds(v4i sg0, v8i sg1, v4i sg2, v4i sg3)
|
||||
void test_amdgcn_tensor_store_from_lds_d4(v4i sg0, v8i sg1, v4i sg2, v4i sg3)
|
||||
{
|
||||
__builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, v8i_zeros, 22);
|
||||
}
|
||||
|
||||
@ -3006,7 +3006,7 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N, unsigned IntrID) {
|
||||
void AMDGPUDAGToDAGISel::SelectTensorLoadStore(SDNode *N, unsigned IntrID) {
|
||||
bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
|
||||
unsigned Opc =
|
||||
IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS : AMDGPU::TENSOR_STORE_FROM_LDS;
|
||||
IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
|
||||
|
||||
SmallVector<SDValue, 7> TensorOps;
|
||||
// First two groups
|
||||
@ -3018,8 +3018,8 @@ void AMDGPUDAGToDAGISel::SelectTensorLoadStore(SDNode *N, unsigned IntrID) {
|
||||
SDValue Group3 = N->getOperand(5);
|
||||
if (ISD::isBuildVectorAllZeros(Group2.getNode()) &&
|
||||
ISD::isBuildVectorAllZeros(Group3.getNode())) {
|
||||
Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_D2
|
||||
: AMDGPU::TENSOR_STORE_FROM_LDS_D2;
|
||||
Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
|
||||
: AMDGPU::TENSOR_STORE_FROM_LDS_d2;
|
||||
} else { // Has at least 4 groups
|
||||
TensorOps.push_back(Group2); // D# group 2
|
||||
TensorOps.push_back(Group3); // D# group 3
|
||||
|
||||
@ -3794,7 +3794,7 @@ bool AMDGPUInstructionSelector::selectTensorLoadStore(MachineInstr &MI,
|
||||
Intrinsic::ID IID) const {
|
||||
bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
|
||||
unsigned Opc =
|
||||
IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS : AMDGPU::TENSOR_STORE_FROM_LDS;
|
||||
IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
|
||||
int NumGroups = 4;
|
||||
|
||||
// A lamda function to check whether an operand is a vector of all 0s.
|
||||
@ -3808,8 +3808,8 @@ bool AMDGPUInstructionSelector::selectTensorLoadStore(MachineInstr &MI,
|
||||
// Use _D2 version if both group 2 and 3 are zero-initialized.
|
||||
if (isAllZeros(MI.getOperand(3)) && isAllZeros(MI.getOperand(4))) {
|
||||
NumGroups = 2;
|
||||
Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_D2
|
||||
: AMDGPU::TENSOR_STORE_FROM_LDS_D2;
|
||||
Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
|
||||
: AMDGPU::TENSOR_STORE_FROM_LDS_d2;
|
||||
}
|
||||
|
||||
// TODO: Handle the fifth group: MI.getOpetand(5), which is silently ignored
|
||||
|
||||
@ -2052,7 +2052,7 @@ def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O_nortn, IMAGE_SAMPLE_C_CD_CL_O_G16_no
|
||||
|
||||
class VIMAGE_TENSOR_Pseudo<string opName, bit _UpTo2D = 0> :
|
||||
InstSI<(outs ), (ins ), "", []>,
|
||||
SIMCInstr<opName#!if(_UpTo2D, "_D2", ""), SIEncodingFamily.NONE> {
|
||||
SIMCInstr<opName#!if(_UpTo2D, "_d2", "_d4"), SIEncodingFamily.NONE> {
|
||||
|
||||
let isPseudo = 1;
|
||||
let isCodeGenOnly = 1;
|
||||
@ -2077,10 +2077,10 @@ class VIMAGE_TENSOR_Pseudo<string opName, bit _UpTo2D = 0> :
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX125xOnly in {
|
||||
def TENSOR_LOAD_TO_LDS : VIMAGE_TENSOR_Pseudo<"tensor_load_to_lds">;
|
||||
def TENSOR_STORE_FROM_LDS : VIMAGE_TENSOR_Pseudo<"tensor_store_from_lds">;
|
||||
def TENSOR_LOAD_TO_LDS_D2 : VIMAGE_TENSOR_Pseudo<"tensor_load_to_lds", 1>;
|
||||
def TENSOR_STORE_FROM_LDS_D2 : VIMAGE_TENSOR_Pseudo<"tensor_store_from_lds", 1>;
|
||||
def TENSOR_LOAD_TO_LDS_d4 : VIMAGE_TENSOR_Pseudo<"tensor_load_to_lds">;
|
||||
def TENSOR_STORE_FROM_LDS_d4 : VIMAGE_TENSOR_Pseudo<"tensor_store_from_lds">;
|
||||
def TENSOR_LOAD_TO_LDS_d2 : VIMAGE_TENSOR_Pseudo<"tensor_load_to_lds", 1>;
|
||||
def TENSOR_STORE_FROM_LDS_d2 : VIMAGE_TENSOR_Pseudo<"tensor_store_from_lds", 1>;
|
||||
} // End SubtargetPredicate = isGFX125xOnly.
|
||||
|
||||
class VIMAGE_TENSOR_Real <bits<8> op, VIMAGE_TENSOR_Pseudo ps, string opName = ps.Mnemonic> :
|
||||
@ -2114,7 +2114,7 @@ class VIMAGE_TENSOR_Real <bits<8> op, VIMAGE_TENSOR_Pseudo ps, string opName = p
|
||||
|
||||
multiclass VIMAGE_TENSOR_Real_gfx1250<bits<8> op> {
|
||||
let AssemblerPredicate = isGFX125xOnly, DecoderNamespace = "GFX1250" in {
|
||||
foreach DSuffix = ["_D2", ""] in {
|
||||
foreach DSuffix = ["_d2", "_d4"] in {
|
||||
defvar ps = !cast<VIMAGE_TENSOR_Pseudo>(NAME # DSuffix);
|
||||
def DSuffix # _gfx1250 : VIMAGE_TENSOR_Real<op, ps, ps.Mnemonic>,
|
||||
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX1250>;
|
||||
|
||||
@ -7530,12 +7530,12 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Legalize TENSOR_LOAD_TO_LDS, TENSOR_LOAD_TO_LDS_D2, TENSOR_STORE_FROM_LDS,
|
||||
// TENSOR_STORE_FROM_LDS_D2. All their operands are scalar.
|
||||
if (MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
|
||||
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
|
||||
MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
|
||||
MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
|
||||
// Legalize TENSOR_LOAD_TO_LDS_d2/_d4, TENSOR_STORE_FROM_LDS_d2/_d4. All their
|
||||
// operands are scalar.
|
||||
if (MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
|
||||
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
|
||||
MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
|
||||
MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
|
||||
for (MachineOperand &Src : MI.explicit_operands()) {
|
||||
if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
|
||||
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
|
||||
|
||||
@ -827,8 +827,8 @@ public:
|
||||
unsigned Opc = MI.getOpcode();
|
||||
// Exclude instructions that read FROM LDS (not write to it)
|
||||
return isLDSDMA(MI) && Opc != AMDGPU::BUFFER_STORE_LDS_DWORD &&
|
||||
Opc != AMDGPU::TENSOR_STORE_FROM_LDS &&
|
||||
Opc != AMDGPU::TENSOR_STORE_FROM_LDS_D2;
|
||||
Opc != AMDGPU::TENSOR_STORE_FROM_LDS_d2 &&
|
||||
Opc != AMDGPU::TENSOR_STORE_FROM_LDS_d4;
|
||||
}
|
||||
|
||||
static bool isSBarrierSCCWrite(unsigned Opcode) {
|
||||
|
||||
@ -773,8 +773,8 @@ bool isAsyncStore(unsigned Opc) {
|
||||
}
|
||||
|
||||
bool isTensorStore(unsigned Opc) {
|
||||
return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
|
||||
Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
|
||||
return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
|
||||
Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
|
||||
}
|
||||
|
||||
unsigned getTemporalHintType(const MCInstrDesc TID) {
|
||||
|
||||
@ -6,8 +6,8 @@
|
||||
declare void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %D0, <8 x i32> %D1, <4 x i32> %D2, <4 x i32> %D3, <8 x i32> %D4, i32 %cpol)
|
||||
declare void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %D0, <8 x i32> %D1, <4 x i32> %D2, <4 x i32> %D3, <8 x i32> %D4, i32 %cpol)
|
||||
|
||||
define amdgpu_ps void @tensor_load_to_lds(<4 x i32> inreg %D0, <8 x i32> inreg %D1, <4 x i32> inreg %D2, <4 x i32> inreg %D3) {
|
||||
; GFX1250-LABEL: tensor_load_to_lds:
|
||||
define amdgpu_ps void @tensor_load_to_lds_d4(<4 x i32> inreg %D0, <8 x i32> inreg %D1, <4 x i32> inreg %D2, <4 x i32> inreg %D3) {
|
||||
; GFX1250-LABEL: tensor_load_to_lds_d4:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19]
|
||||
@ -16,8 +16,8 @@ define amdgpu_ps void @tensor_load_to_lds(<4 x i32> inreg %D0, <8 x i32> inreg %
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @tensor_load_to_lds_vector(<4 x i32> %D0, <8 x i32> %D1, <4 x i32> %D2, <4 x i32> %D3) {
|
||||
; GFX1250-SDAG-LABEL: tensor_load_to_lds_vector:
|
||||
define amdgpu_ps void @tensor_load_to_lds_d4_vector(<4 x i32> %D0, <8 x i32> %D1, <4 x i32> %D2, <4 x i32> %D3) {
|
||||
; GFX1250-SDAG-LABEL: tensor_load_to_lds_d4_vector:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v4
|
||||
@ -44,7 +44,7 @@ define amdgpu_ps void @tensor_load_to_lds_vector(<4 x i32> %D0, <8 x i32> %D1, <
|
||||
; GFX1250-SDAG-NEXT: tensor_load_to_lds s[8:11], s[0:7], s[12:15], s[16:19]
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-LABEL: tensor_load_to_lds_vector:
|
||||
; GFX1250-GISEL-LABEL: tensor_load_to_lds_d4_vector:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s8, v0
|
||||
@ -126,8 +126,8 @@ define amdgpu_ps void @tensor_load_to_lds_d2_vector(<4 x i32> %D0, <8 x i32> %D1
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @tensor_store_from_lds(<4 x i32> inreg %D0, <8 x i32> inreg %D1, <4 x i32> inreg %D2, <4 x i32> inreg %D3) {
|
||||
; GFX1250-LABEL: tensor_store_from_lds:
|
||||
define amdgpu_ps void @tensor_store_from_lds_d4(<4 x i32> inreg %D0, <8 x i32> inreg %D1, <4 x i32> inreg %D2, <4 x i32> inreg %D3) {
|
||||
; GFX1250-LABEL: tensor_store_from_lds_d4:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-NEXT: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV
|
||||
@ -136,8 +136,8 @@ define amdgpu_ps void @tensor_store_from_lds(<4 x i32> inreg %D0, <8 x i32> inre
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @tensor_store_from_lds_vector(<4 x i32> %D0, <8 x i32> %D1, <4 x i32> %D2, <4 x i32> %D3) {
|
||||
; GFX1250-SDAG-LABEL: tensor_store_from_lds_vector:
|
||||
define amdgpu_ps void @tensor_store_from_lds_d4_vector(<4 x i32> %D0, <8 x i32> %D1, <4 x i32> %D2, <4 x i32> %D3) {
|
||||
; GFX1250-SDAG-LABEL: tensor_store_from_lds_d4_vector:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v4
|
||||
@ -164,7 +164,7 @@ define amdgpu_ps void @tensor_store_from_lds_vector(<4 x i32> %D0, <8 x i32> %D1
|
||||
; GFX1250-SDAG-NEXT: tensor_store_from_lds s[8:11], s[0:7], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-LABEL: tensor_store_from_lds_vector:
|
||||
; GFX1250-GISEL-LABEL: tensor_store_from_lds_d4_vector:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
|
||||
; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s8, v0
|
||||
|
||||
@ -18,7 +18,7 @@ body: |
|
||||
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
|
||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 0
|
||||
; CHECK-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
|
||||
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_d2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@ -27,8 +27,8 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
||||
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_d2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_d2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
; CHECK-NEXT: $vcc_lo = COPY $exec_lo
|
||||
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
|
||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 1
|
||||
@ -47,7 +47,7 @@ body: |
|
||||
undef %3.sub0:sgpr_128 = COPY %2
|
||||
%4:sreg_32 = S_MOV_B32 0
|
||||
undef %5.sub0:sgpr_256 = COPY %4
|
||||
TENSOR_LOAD_TO_LDS_D2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
TENSOR_LOAD_TO_LDS_d2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
%6:sgpr_128 = COPY killed %3
|
||||
%6.sub1:sgpr_128 = COPY killed %1
|
||||
%7:sreg_32 = COPY $exec_lo
|
||||
@ -62,11 +62,11 @@ body: |
|
||||
%11.sub1:sgpr_128 = COPY killed %10
|
||||
%11.sub2:sgpr_128 = COPY %2
|
||||
%11.sub3:sgpr_128 = COPY %2
|
||||
TENSOR_LOAD_TO_LDS_D2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
TENSOR_LOAD_TO_LDS_d2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
%12:sreg_32 = COPY killed %9
|
||||
%13:sgpr_128 = COPY %6
|
||||
%13.sub2:sgpr_128 = COPY killed %12
|
||||
TENSOR_LOAD_TO_LDS_D2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
TENSOR_LOAD_TO_LDS_d2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
|
||||
$vcc_lo = COPY %7
|
||||
%8:sreg_32 = COPY %4
|
||||
%9:sreg_32 = COPY %2
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user