AMDGPU: Handle gfx950 96/128-bit buffer_load_lds (#116681)
Enforcing this limit in the clang builtin will come later.
This commit is contained in:
parent
50224bd5ba
commit
927032807d
@ -1674,7 +1674,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
|
||||
[],
|
||||
[llvm_v4i32_ty, // rsrc(SGPR)
|
||||
LLVMQualPointerType<3>, // LDS base offset
|
||||
llvm_i32_ty, // Data byte size: 1/2/4
|
||||
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
|
||||
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
|
||||
@ -1693,7 +1693,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic <
|
||||
[],
|
||||
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
|
||||
LLVMQualPointerType<3>, // LDS base offset
|
||||
llvm_i32_ty, // Data byte size: 1/2/4
|
||||
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
|
||||
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
|
||||
@ -1715,7 +1715,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
|
||||
[],
|
||||
[llvm_v4i32_ty, // rsrc(SGPR)
|
||||
LLVMQualPointerType<3>, // LDS base offset
|
||||
llvm_i32_ty, // Data byte size: 1/2/4
|
||||
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
@ -1735,7 +1735,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
|
||||
[],
|
||||
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
|
||||
LLVMQualPointerType<3>, // LDS base offset
|
||||
llvm_i32_ty, // Data byte size: 1/2/4
|
||||
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
|
||||
@ -3240,6 +3240,24 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
|
||||
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
|
||||
break;
|
||||
case 12:
|
||||
if (!Subtarget->hasLDSLoadB96_B128())
|
||||
return false;
|
||||
|
||||
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
|
||||
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
|
||||
break;
|
||||
case 16:
|
||||
if (!Subtarget->hasLDSLoadB96_B128())
|
||||
return false;
|
||||
|
||||
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
|
||||
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
|
||||
break;
|
||||
}
|
||||
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
|
||||
@ -573,9 +573,17 @@ multiclass MUBUF_Pseudo_Loads<string opName, ValueType load_vt = i32,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> {
|
||||
multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32, Predicate LDSPred = TruePredicate> {
|
||||
defm NAME : MUBUF_Pseudo_Loads<opName, load_vt>;
|
||||
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
|
||||
|
||||
if !ne(LDSPred, TruePredicate) then {
|
||||
let SubtargetPredicate = LDSPred in {
|
||||
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
|
||||
}
|
||||
} else {
|
||||
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
multiclass MUBUF_Pseudo_Loads_LDSOpc<string opName,
|
||||
@ -956,11 +964,11 @@ defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds <
|
||||
defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
|
||||
"buffer_load_dwordx2", v2i32
|
||||
>;
|
||||
defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
|
||||
"buffer_load_dwordx3", v3i32
|
||||
defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads_Lds <
|
||||
"buffer_load_dwordx3", v3i32, /*LDSPred=*/HasGFX950Insts
|
||||
>;
|
||||
defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
|
||||
"buffer_load_dwordx4", v4i32
|
||||
defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads_Lds <
|
||||
"buffer_load_dwordx4", v4i32, /*LDSPred=*/HasGFX950Insts
|
||||
>;
|
||||
|
||||
defm BUFFER_LOAD_LDS_B32 : MUBUF_Pseudo_Loads_LDSOpc <
|
||||
@ -3231,8 +3239,8 @@ defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>;
|
||||
defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>;
|
||||
defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>;
|
||||
defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_vi <0x15>;
|
||||
defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_vi <0x16>;
|
||||
defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_vi <0x17>;
|
||||
defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_Lds_vi <0x16>;
|
||||
defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_Lds_vi <0x17>;
|
||||
defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>;
|
||||
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x19>;
|
||||
defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>;
|
||||
|
||||
@ -9825,6 +9825,22 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
|
||||
break;
|
||||
case 12:
|
||||
if (!Subtarget->hasLDSLoadB96_B128())
|
||||
return SDValue();
|
||||
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
|
||||
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
|
||||
break;
|
||||
case 16:
|
||||
if (!Subtarget->hasLDSLoadB96_B128())
|
||||
return SDValue();
|
||||
Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
|
||||
: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
|
||||
: AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));
|
||||
|
||||
@ -2,6 +2,14 @@
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
|
||||
|
||||
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
|
||||
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
|
||||
|
||||
; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.lds
|
||||
|
||||
; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.global.load.lds),
|
||||
|
||||
|
||||
declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
|
||||
|
||||
;---------------------------------------------------------------------y
|
||||
|
||||
@ -0,0 +1,176 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
|
||||
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
|
||||
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
|
||||
|
||||
; FIXME: Not a great error
|
||||
; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand!
|
||||
; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.ptr.buffer.load.lds),
|
||||
|
||||
declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
|
||||
|
||||
;---------------------------------------------------------------------y
|
||||
; dwordx3
|
||||
;---------------------------------------------------------------------
|
||||
|
||||
define amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3:
|
||||
; GFX950: ; %bb.0: ; %main_body
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 lds
|
||||
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds
|
||||
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:8 nt lds
|
||||
; GFX950-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: ds_read_b32 v0, v0
|
||||
; GFX950-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
|
||||
%res = load float, ptr addrspace(3) %lds
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_imm_voffset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: v_mov_b32_e32 v0, 0x800
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 2048, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 off, s[0:3], s5 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 0, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 offen offset:2048 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 2048, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;---------------------------------------------------------------------y
|
||||
; dwordx4
|
||||
;---------------------------------------------------------------------
|
||||
|
||||
define amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4:
|
||||
; GFX950: ; %bb.0: ; %main_body
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 lds
|
||||
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds
|
||||
; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:8 nt lds
|
||||
; GFX950-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX950-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-NEXT: ds_read_b32 v0, v0
|
||||
; GFX950-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
|
||||
%res = load float, ptr addrspace(3) %lds
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_imm_voffset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: v_mov_b32_e32 v0, 0x800
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 2048, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 off, s[0:3], s5 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 0, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 offen offset:2048 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 2048, i32 0)
|
||||
ret void
|
||||
}
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX950-GISEL: {{.*}}
|
||||
; GFX950-SDAG: {{.*}}
|
||||
@ -0,0 +1,196 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
|
||||
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
|
||||
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
|
||||
|
||||
; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand!
|
||||
; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.ptr.buffer.load.lds),
|
||||
|
||||
declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
|
||||
|
||||
;---------------------------------------------------------------------y
|
||||
; dwordx3
|
||||
;---------------------------------------------------------------------
|
||||
|
||||
define amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
||||
; GFX950-SDAG-LABEL: buffer_load_lds_dwordx3:
|
||||
; GFX950-SDAG: ; %bb.0:
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8
|
||||
; GFX950-SDAG-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-SDAG-NEXT: s_nop 0
|
||||
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds
|
||||
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
||||
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: ds_read_b32 v0, v0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX950-GISEL-LABEL: buffer_load_lds_dwordx3:
|
||||
; GFX950-GISEL: ; %bb.0:
|
||||
; GFX950-GISEL-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8
|
||||
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds
|
||||
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
||||
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds
|
||||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: ds_read_b32 v0, v0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: ; return to shader part epilog
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 4, i32 1)
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 8, i32 2)
|
||||
%res = load float, ptr addrspace(3) %lds
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_imm_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:2048 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 0, i32 2048, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], 0 idxen offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 idxen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen offset:2048 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;---------------------------------------------------------------------y
|
||||
; dwordx4
|
||||
;---------------------------------------------------------------------
|
||||
|
||||
define amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
||||
; GFX950-SDAG-LABEL: buffer_load_lds_dwordx4:
|
||||
; GFX950-SDAG: ; %bb.0:
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8
|
||||
; GFX950-SDAG-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-SDAG-NEXT: s_nop 0
|
||||
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds
|
||||
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
||||
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds
|
||||
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-SDAG-NEXT: ds_read_b32 v0, v0
|
||||
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-SDAG-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX950-GISEL-LABEL: buffer_load_lds_dwordx4:
|
||||
; GFX950-GISEL: ; %bb.0:
|
||||
; GFX950-GISEL-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8
|
||||
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds
|
||||
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
||||
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds
|
||||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX950-GISEL-NEXT: ds_read_b32 v0, v0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: ; return to shader part epilog
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 4, i32 1)
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 8, i32 2)
|
||||
%res = load float, ptr addrspace(3) %lds
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_imm_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:2048 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 0, i32 2048, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], 0 idxen offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 idxen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset:
|
||||
; GFX950: ; %bb.0:
|
||||
; GFX950-NEXT: s_mov_b32 m0, s4
|
||||
; GFX950-NEXT: s_nop 0
|
||||
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen offset:2048 lds
|
||||
; GFX950-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0)
|
||||
ret void
|
||||
}
|
||||
32
llvm/test/MC/AMDGPU/mubuf-gfx950.s
Normal file
32
llvm/test/MC/AMDGPU/mubuf-gfx950.s
Normal file
@ -0,0 +1,32 @@
|
||||
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX950 %s
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx803 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1030 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
|
||||
// FIXME: Bad diagnostics on unsupported subtarget
|
||||
|
||||
// GFX950: buffer_load_dwordx3 off, s[8:11], s3 lds ; encoding: [0x00,0x00,0x59,0xe0,0x00,0x00,0x02,0x03]
|
||||
// ERR: :[[@LINE+1]]:21: error: invalid operand for instruction
|
||||
buffer_load_dwordx3 off, s[8:11], s3 lds
|
||||
|
||||
// GFX950: buffer_load_dwordx3 off, s[8:11], s3 offset:4095 lds ; encoding: [0xff,0x0f,0x59,0xe0,0x00,0x00,0x02,0x03]
|
||||
// ERR: :[[@LINE+1]]:38: error: not a valid operand
|
||||
buffer_load_dwordx3 off, s[8:11], s3 offset:4095 lds
|
||||
|
||||
// GFX950: buffer_load_dwordx3 v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x59,0xe0,0x00,0x00,0x02,0x65]
|
||||
// ERR: :[[@LINE+1]]:39: error: invalid operand for instruction
|
||||
buffer_load_dwordx3 v0, s[8:11], s101 offen lds
|
||||
|
||||
|
||||
|
||||
// GFX950: buffer_load_dwordx4 off, s[8:11], s3 lds ; encoding: [0x00,0x00,0x5d,0xe0,0x00,0x00,0x02,0x03]
|
||||
// ERR: :[[@LINE+1]]:21: error: invalid operand for instruction
|
||||
buffer_load_dwordx4 off, s[8:11], s3 lds
|
||||
|
||||
// GFX950: buffer_load_dwordx4 off, s[8:11], s3 offset:4095 lds ; encoding: [0xff,0x0f,0x5d,0xe0,0x00,0x00,0x02,0x03]
|
||||
// ERR: :[[@LINE+1]]:38: error: not a valid operand
|
||||
buffer_load_dwordx4 off, s[8:11], s3 offset:4095 lds
|
||||
|
||||
// GFX950: buffer_load_dwordx4 v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x5d,0xe0,0x00,0x00,0x02,0x65]
|
||||
// ERR: :[[@LINE+1]]:39: error: invalid operand for instruction
|
||||
buffer_load_dwordx4 v0, s[8:11], s101 offen lds
|
||||
@ -23,3 +23,22 @@
|
||||
|
||||
# GFX950: global_load_lds_dwordx4 v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xf7,0xdf,0x02,0x00,0x7f,0x00]
|
||||
0x00,0x80,0xf7,0xdf,0x02,0x00,0x7f,0x00
|
||||
|
||||
|
||||
# GFX950: buffer_load_dwordx3 off, s[8:11], s3 lds ; encoding: [0x00,0x00,0x59,0xe0,0x00,0x00,0x02,0x03]
|
||||
0x00,0x00,0x59,0xe0,0x00,0x00,0x02,0x03
|
||||
|
||||
# GFX950: buffer_load_dwordx3 off, s[8:11], s3 offset:4095 lds ; encoding: [0xff,0x0f,0x59,0xe0,0x00,0x00,0x02,0x03]
|
||||
0xff,0x0f,0x59,0xe0,0x00,0x00,0x02,0x03
|
||||
|
||||
# GFX950: buffer_load_dwordx3 v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x59,0xe0,0x00,0x00,0x02,0x65]
|
||||
0x00,0x10,0x59,0xe0,0x00,0x00,0x02,0x65
|
||||
|
||||
# GFX950: buffer_load_dwordx4 off, s[8:11], s3 lds ; encoding: [0x00,0x00,0x5d,0xe0,0x00,0x00,0x02,0x03]
|
||||
0x00,0x00,0x5d,0xe0,0x00,0x00,0x02,0x03
|
||||
|
||||
# GFX950: buffer_load_dwordx4 off, s[8:11], s3 offset:4095 lds ; encoding: [0xff,0x0f,0x5d,0xe0,0x00,0x00,0x02,0x03]
|
||||
0xff,0x0f,0x5d,0xe0,0x00,0x00,0x02,0x03
|
||||
|
||||
# GFX950: buffer_load_dwordx4 v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x5d,0xe0,0x00,0x00,0x02,0x65]
|
||||
0x00,0x10,0x5d,0xe0,0x00,0x00,0x02,0x65
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user