[AMDGPU][GlobalISel] Add buffer store byte/short RegBankLegalize rules (#179367)
This commit is contained in:
parent
8283972837
commit
e0c2cc7ed0
@ -269,7 +269,8 @@ UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
|
||||
return B64;
|
||||
if (Ty == LLT::fixed_vector(3, 32))
|
||||
return B96;
|
||||
if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
|
||||
if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
|
||||
Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
|
||||
return B128;
|
||||
return _;
|
||||
}
|
||||
@ -1022,7 +1023,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
|
||||
.Any(
|
||||
{{DivB128}, {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
|
||||
|
||||
addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_FORMAT,
|
||||
addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
|
||||
G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
|
||||
G_AMDGPU_BUFFER_STORE_FORMAT_D16,
|
||||
G_AMDGPU_TBUFFER_STORE_FORMAT,
|
||||
G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1200 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1250 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1200 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1250 %s
|
||||
; FIXME: Test with SI when argument lowering not broken for f16
|
||||
|
||||
; Natural mapping
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; FIXME: Test with SI when argument lowering not broken for f16
|
||||
|
||||
; Natural mapping
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1200 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1250 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1200 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1250 %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @struct_ptr_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=SDAG %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -global-isel-abort=2 < %s | FileCheck -check-prefix=GISEL %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -new-reg-bank-select -global-isel-abort=2 < %s | FileCheck -check-prefix=GISEL %s
|
||||
|
||||
; Note: if you're adding tests here, also add them to
|
||||
; lower-buffer-fat-pointers-contents-legalization.ll to verify the IR produced by
|
||||
@ -173,6 +173,14 @@ define i128 @load_i128(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load i128, ptr addrspace(7) %p
|
||||
@ -439,6 +447,14 @@ define <8 x i16> @load_v8i16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <8 x i16>, ptr addrspace(7) %p
|
||||
@ -477,6 +493,14 @@ define <2 x i64> @load_v2i64(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <2 x i64>, ptr addrspace(7) %p
|
||||
@ -667,6 +691,14 @@ define <8 x half> @load_v8f16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <8 x half>, ptr addrspace(7) %p
|
||||
@ -1161,6 +1193,14 @@ define <2 x ptr addrspace(1)> @load_v2p1(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <2 x ptr addrspace(1)>, ptr addrspace(7) %p
|
||||
@ -1199,6 +1239,10 @@ define <2 x ptr addrspace(5)> @load_v2p5(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <2 x ptr addrspace(5)>, ptr addrspace(7) %p
|
||||
@ -1237,6 +1281,12 @@ define <3 x ptr addrspace(5)> @load_v3p5(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <3 x ptr addrspace(5)>, ptr addrspace(7) %p
|
||||
@ -1275,6 +1325,14 @@ define <4 x ptr addrspace(5)> @load_v4p5(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <4 x ptr addrspace(5)>, ptr addrspace(7) %p
|
||||
@ -1315,6 +1373,12 @@ define <6 x half> @load_v6f16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <6 x half>, ptr addrspace(7) %p
|
||||
@ -1612,7 +1676,24 @@ define <4 x ptr addrspace(1)> @load_v4p1(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s8, v4
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s9, v5
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s10, v6
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s11, v7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GISEL-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GISEL-NEXT: v_mov_b32_e32 v7, s11
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <4 x ptr addrspace(1)>, ptr addrspace(7) %p
|
||||
@ -1655,6 +1736,8 @@ define <1 x i16> @load_v1i16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <1 x i16>, ptr addrspace(7) %p
|
||||
@ -1692,8 +1775,11 @@ define <3 x i16> @load_v3i16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-LABEL: load_v3i16:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ushort v1, off, s[16:19], 0 offset:4
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v1
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
@ -1734,8 +1820,11 @@ define <5 x i16> @load_v5i16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-LABEL: load_v5i16:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ushort v2, off, s[16:19], 0 offset:8
|
||||
; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
@ -1777,6 +1866,12 @@ define <6 x i16> @load_v6i16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <6 x i16>, ptr addrspace(7) %p
|
||||
@ -1816,7 +1911,16 @@ define <7 x i16> @load_v7i16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ushort v3, off, s[16:19], 0 offset:12
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <7 x i16>, ptr addrspace(7) %p
|
||||
@ -1858,7 +1962,18 @@ define <9 x i16> @load_v9i16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:16
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s8, v4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <9 x i16>, ptr addrspace(7) %p
|
||||
@ -1942,7 +2057,9 @@ define <2 x i8> @load_v2i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <2 x i8>, ptr addrspace(7) %p
|
||||
@ -1990,7 +2107,9 @@ define <3 x i8> @load_v3i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ubyte v2, off, s[16:19], 0 offset:2
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
@ -2040,9 +2159,13 @@ define <4 x i8> @load_v4i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <4 x i8>, ptr addrspace(7) %p
|
||||
@ -2100,9 +2223,13 @@ define <5 x i8> @load_v5i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ubyte v4, off, s[16:19], 0 offset:4
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
@ -2167,11 +2294,17 @@ define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v4
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s5, 8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s5
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <6 x i8>, ptr addrspace(7) %p
|
||||
@ -2238,11 +2371,17 @@ define <7 x i8> @load_v7i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
|
||||
; GISEL-NEXT: buffer_load_ubyte v6, off, s[16:19], 0 offset:6
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(2)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v4
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s5, 8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s5
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
@ -2311,14 +2450,21 @@ define <8 x i8> @load_v8i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s8, s5, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s9, s5, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s5, 24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, v1
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, v8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v6, s9
|
||||
; GISEL-NEXT: v_mov_b32_e32 v7, s5
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <8 x i8>, ptr addrspace(7) %p
|
||||
@ -2393,19 +2539,29 @@ define <12 x i8> @load_v12i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v13, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v12, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s8, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s9, s5, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s10, s5, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s5, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s11, s6, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s12, s6, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s6, 24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, v1
|
||||
; GISEL-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, v13
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, v12
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GISEL-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GISEL-NEXT: v_mov_b32_e32 v7, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v9, s11
|
||||
; GISEL-NEXT: v_mov_b32_e32 v10, s12
|
||||
; GISEL-NEXT: v_mov_b32_e32 v11, s6
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <12 x i8>, ptr addrspace(7) %p
|
||||
@ -2495,24 +2651,37 @@ define <16 x i8> @load_v16i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v16, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v17, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v18, 24, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v13, 8, v3
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v14, 16, v3
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v15, 24, v3
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: s_lshr_b32 s8, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s9, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s10, s5, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s11, s5, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s5, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s12, s6, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s13, s6, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s6, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s14, s7, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s15, s7, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s7, 24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, v1
|
||||
; GISEL-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v12, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, v16
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, v17
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, v18
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s9
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s10
|
||||
; GISEL-NEXT: v_mov_b32_e32 v6, s11
|
||||
; GISEL-NEXT: v_mov_b32_e32 v7, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v9, s12
|
||||
; GISEL-NEXT: v_mov_b32_e32 v10, s13
|
||||
; GISEL-NEXT: v_mov_b32_e32 v11, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v13, s14
|
||||
; GISEL-NEXT: v_mov_b32_e32 v14, s15
|
||||
; GISEL-NEXT: v_mov_b32_e32 v15, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <16 x i8>, ptr addrspace(7) %p
|
||||
@ -2629,43 +2798,69 @@ define <32 x i8> @load_v32i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[16:19], off, s[16:19], 0 offset:16
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v35, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v36, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v37, 24, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v32, 8, v16
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v33, 16, v16
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v34, 24, v16
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v2
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v13, 8, v3
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v14, 16, v3
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v15, 24, v3
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v21, 8, v17
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v22, 16, v17
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v23, 24, v17
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v25, 8, v18
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v26, 16, v18
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v27, 24, v18
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v29, 8, v19
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v30, 16, v19
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v31, 24, v19
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s8, v16
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s9, v17
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s10, v18
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s11, v19
|
||||
; GISEL-NEXT: s_lshr_b32 s12, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s13, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s14, s5, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s15, s5, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s5, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s16, s6, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s17, s6, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s6, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s18, s7, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s19, s7, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s7, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s20, s8, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s21, s8, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s8, s8, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s22, s9, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s23, s9, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s9, s9, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s24, s10, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s25, s10, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s10, s10, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s26, s11, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s27, s11, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s11, s11, 24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, v1
|
||||
; GISEL-NEXT: v_mov_b32_e32 v8, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v12, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v20, v17
|
||||
; GISEL-NEXT: v_mov_b32_e32 v24, v18
|
||||
; GISEL-NEXT: v_mov_b32_e32 v28, v19
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, v35
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, v36
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, v37
|
||||
; GISEL-NEXT: v_mov_b32_e32 v17, v32
|
||||
; GISEL-NEXT: v_mov_b32_e32 v18, v33
|
||||
; GISEL-NEXT: v_mov_b32_e32 v19, v34
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s12
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s13
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s14
|
||||
; GISEL-NEXT: v_mov_b32_e32 v6, s15
|
||||
; GISEL-NEXT: v_mov_b32_e32 v7, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v9, s16
|
||||
; GISEL-NEXT: v_mov_b32_e32 v10, s17
|
||||
; GISEL-NEXT: v_mov_b32_e32 v11, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v13, s18
|
||||
; GISEL-NEXT: v_mov_b32_e32 v14, s19
|
||||
; GISEL-NEXT: v_mov_b32_e32 v15, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v17, s20
|
||||
; GISEL-NEXT: v_mov_b32_e32 v18, s21
|
||||
; GISEL-NEXT: v_mov_b32_e32 v19, s8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v21, s22
|
||||
; GISEL-NEXT: v_mov_b32_e32 v22, s23
|
||||
; GISEL-NEXT: v_mov_b32_e32 v23, s9
|
||||
; GISEL-NEXT: v_mov_b32_e32 v25, s24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v26, s25
|
||||
; GISEL-NEXT: v_mov_b32_e32 v27, s10
|
||||
; GISEL-NEXT: v_mov_b32_e32 v29, s26
|
||||
; GISEL-NEXT: v_mov_b32_e32 v30, s27
|
||||
; GISEL-NEXT: v_mov_b32_e32 v31, s11
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <32 x i8>, ptr addrspace(7) %p
|
||||
@ -2871,7 +3066,9 @@ define [2 x half] @load_a2f16(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 16
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load [2 x half], ptr addrspace(7) %p
|
||||
@ -2914,6 +3111,14 @@ define [2 x ptr addrspace(1)] @load_a2p1(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s7, v3
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load [2 x ptr addrspace(1)], ptr addrspace(7) %p
|
||||
@ -2955,19 +3160,23 @@ define i40 @load_i40(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: buffer_load_ubyte v1, off, s[16:19], 0 offset:4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, 0xff
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(1)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v4, 24, v0
|
||||
; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3
|
||||
; GISEL-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
; GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
|
||||
; GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3
|
||||
; GISEL-NEXT: v_or_b32_e32 v2, v2, v4
|
||||
; GISEL-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
||||
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s4, 24
|
||||
; GISEL-NEXT: s_and_b32 s5, s5, 0xff
|
||||
; GISEL-NEXT: s_and_b32 s6, s6, 0xff
|
||||
; GISEL-NEXT: s_lshl_b32 s7, s7, 8
|
||||
; GISEL-NEXT: s_and_b32 s4, s4, 0xff
|
||||
; GISEL-NEXT: s_lshl_b32 s5, s5, 8
|
||||
; GISEL-NEXT: s_or_b32 s6, s6, s7
|
||||
; GISEL-NEXT: s_or_b32 s4, s4, s5
|
||||
; GISEL-NEXT: s_and_b32 s5, 0xffff, s6
|
||||
; GISEL-NEXT: s_and_b32 s4, 0xffff, s4
|
||||
; GISEL-NEXT: s_lshl_b32 s5, s5, 16
|
||||
; GISEL-NEXT: s_or_b32 s4, s4, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
@ -3009,6 +3218,12 @@ define i96 @load_i96(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v1
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s6, v2
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load i96, ptr addrspace(7) %p
|
||||
@ -3221,7 +3436,9 @@ define <2 x i4> @load_v2i4(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_ubyte v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <2 x i4>, ptr addrspace(7) %p
|
||||
@ -3279,9 +3496,13 @@ define <4 x i4> @load_v4i4(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 4, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 12, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s4, 4
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 12
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <4 x i4>, ptr addrspace(7) %p
|
||||
@ -3347,13 +3568,21 @@ define <8 x i4> @load_v8i4(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 4, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 12, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 20, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v7, 28, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s4, 4
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s4, 12
|
||||
; GISEL-NEXT: s_lshr_b32 s8, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s9, s4, 20
|
||||
; GISEL-NEXT: s_lshr_b32 s10, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 28
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s9
|
||||
; GISEL-NEXT: v_mov_b32_e32 v6, s10
|
||||
; GISEL-NEXT: v_mov_b32_e32 v7, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <8 x i4>, ptr addrspace(7) %p
|
||||
@ -3429,7 +3658,10 @@ define <2 x i6> @load_v2i6(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b16_e32 v1, 6, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_and_b32 s4, 0xffff, s4
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load <2 x i6>, ptr addrspace(7) %p
|
||||
@ -3528,9 +3760,13 @@ define <4 x i8> @volatile_load_v4i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 glc
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load volatile <4 x i8>, ptr addrspace(7) %p
|
||||
@ -3593,10 +3829,16 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) {
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s4, v0
|
||||
; GISEL-NEXT: v_readfirstlane_b32 s5, v4
|
||||
; GISEL-NEXT: s_lshr_b32 s6, s4, 8
|
||||
; GISEL-NEXT: s_lshr_b32 s7, s4, 16
|
||||
; GISEL-NEXT: s_lshr_b32 s4, s4, 24
|
||||
; GISEL-NEXT: s_lshr_b32 s5, s5, 8
|
||||
; GISEL-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, s7
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, s5
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
||||
%ret = load volatile <6 x i8>, ptr addrspace(7) %p
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user