[AMDGPU][GlobalISel] Add RegBankLegalize rules for buffer store variants (#178488)
Add rules for G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_FORMAT, G_AMDGPU_BUFFER_STORE_FORMAT_D16, and G_AMDGPU_TBUFFER_STORE_FORMAT.
This commit is contained in:
parent
ea251669ba
commit
cba7dcd107
@ -1079,6 +1079,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
|
||||
case VgprV2S32:
|
||||
case UniInVgprV2S32:
|
||||
return LLT::fixed_vector(2, 32);
|
||||
case VgprV3S32:
|
||||
return LLT::fixed_vector(3, 32);
|
||||
case SgprV4S32:
|
||||
case SgprV4S32_WF:
|
||||
case VgprV4S32:
|
||||
@ -1215,8 +1217,9 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
|
||||
case VgprPtr128:
|
||||
case VgprV2S16:
|
||||
case VgprV2S32:
|
||||
case VgprV4S32:
|
||||
case VgprV2S64:
|
||||
case VgprV3S32:
|
||||
case VgprV4S32:
|
||||
case VgprB32:
|
||||
case VgprB64:
|
||||
case VgprB96:
|
||||
@ -1272,8 +1275,9 @@ bool RegBankLegalizeHelper::applyMappingDst(
|
||||
case VgprP5:
|
||||
case VgprV2S16:
|
||||
case VgprV2S32:
|
||||
case VgprV4S32:
|
||||
case VgprV2S64: {
|
||||
case VgprV2S64:
|
||||
case VgprV3S32:
|
||||
case VgprV4S32: {
|
||||
assert(Ty == getTyFromID(MethodIDs[OpIdx]));
|
||||
assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
|
||||
break;
|
||||
@ -1449,8 +1453,9 @@ bool RegBankLegalizeHelper::applyMappingSrc(
|
||||
case VgprP5:
|
||||
case VgprV2S16:
|
||||
case VgprV2S32:
|
||||
case VgprV4S32:
|
||||
case VgprV2S64: {
|
||||
case VgprV2S64:
|
||||
case VgprV3S32:
|
||||
case VgprV4S32: {
|
||||
assert(Ty == getTyFromID(MethodIDs[i]));
|
||||
if (RB != VgprRB) {
|
||||
auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
|
||||
|
||||
@ -78,6 +78,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
|
||||
return isAnyPtr(MRI.getType(Reg), 128);
|
||||
case V2S32:
|
||||
return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
|
||||
case V3S32:
|
||||
return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
|
||||
case V4S32:
|
||||
return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
|
||||
case B32:
|
||||
@ -989,8 +991,18 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
|
||||
.Div(B32, {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
|
||||
.Uni(B32, {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
|
||||
|
||||
addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
|
||||
.Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
|
||||
addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_FORMAT,
|
||||
G_AMDGPU_BUFFER_STORE_FORMAT_D16,
|
||||
G_AMDGPU_TBUFFER_STORE_FORMAT})
|
||||
.Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
|
||||
.Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
|
||||
.Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
|
||||
.Any(
|
||||
{{V2S32}, {{}, {VgprV2S32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
|
||||
.Any(
|
||||
{{V3S32}, {{}, {VgprV3S32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
|
||||
.Any({{V4S32},
|
||||
{{}, {VgprV4S32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
|
||||
|
||||
addRulesForGOpcs({G_PTR_ADD})
|
||||
.Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
|
||||
|
||||
@ -180,6 +180,7 @@ enum RegBankLLTMappingApplyID {
|
||||
VgprPtr128,
|
||||
VgprV2S16,
|
||||
VgprV2S32,
|
||||
VgprV3S32,
|
||||
VgprB32,
|
||||
VgprB64,
|
||||
VgprB96,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck --check-prefix=GFX9 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck --check-prefix=GFX9 %s
|
||||
|
||||
define ptr @buffer_load_p0(ptr addrspace(8) inreg %buf) {
|
||||
; GFX9-LABEL: name: buffer_load_p0
|
||||
@ -176,10 +176,14 @@ define <2 x ptr addrspace(1)> @buffer_load_v2p1(ptr addrspace(8) inreg %buf) {
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1
|
||||
; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2
|
||||
; GFX9-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
|
||||
; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
|
||||
; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
|
||||
; GFX9-NEXT: $vgpr3 = COPY [[COPY7]]
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[V_READFIRSTLANE_B32_]]
|
||||
; GFX9-NEXT: $vgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
|
||||
; GFX9-NEXT: $vgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
|
||||
; GFX9-NEXT: $vgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
|
||||
; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
|
||||
%ret = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v2p1(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
|
||||
ret <2 x ptr addrspace(1)> %ret
|
||||
@ -224,9 +228,12 @@ define <3 x ptr addrspace(5)> @buffer_load_v3p5(ptr addrspace(8) inreg %buf) {
|
||||
; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub0
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub1
|
||||
; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFSET]].sub2
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
|
||||
; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
|
||||
; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[V_READFIRSTLANE_B32_]]
|
||||
; GFX9-NEXT: $vgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
|
||||
; GFX9-NEXT: $vgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
|
||||
; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
|
||||
%ret = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
|
||||
ret <3 x ptr addrspace(5)> %ret
|
||||
@ -269,10 +276,14 @@ define <4 x ptr addrspace(5)> @buffer_load_v4p5(ptr addrspace(8) inreg %buf) {
|
||||
; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1
|
||||
; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2
|
||||
; GFX9-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[COPY4]]
|
||||
; GFX9-NEXT: $vgpr1 = COPY [[COPY5]]
|
||||
; GFX9-NEXT: $vgpr2 = COPY [[COPY6]]
|
||||
; GFX9-NEXT: $vgpr3 = COPY [[COPY7]]
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; GFX9-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
|
||||
; GFX9-NEXT: $vgpr0 = COPY [[V_READFIRSTLANE_B32_]]
|
||||
; GFX9-NEXT: $vgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
|
||||
; GFX9-NEXT: $vgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
|
||||
; GFX9-NEXT: $vgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
|
||||
; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
|
||||
%ret = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) inreg %buf, i32 0, i32 0, i32 0)
|
||||
ret <4 x ptr addrspace(5)> %ret
|
||||
|
||||
@ -1,10 +1,15 @@
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -global-isel -new-reg-bank-select -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}test1:
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_cs void @test1(<4 x i32> inreg %buf, i32 %off) {
|
||||
; GCN-LABEL: test1:
|
||||
; GCN: ; %bb.0: ; %.entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8
|
||||
; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
.entry:
|
||||
call void @llvm.amdgcn.raw.buffer.store.i32(i32 0, <4 x i32> %buf, i32 8, i32 0, i32 0)
|
||||
%val = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %buf, i32 %off, i32 0, i32 0)
|
||||
@ -12,11 +17,15 @@ define amdgpu_cs void @test1(<4 x i32> inreg %buf, i32 %off) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test1_ptrs:
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_cs void @test1_ptrs(ptr addrspace(8) inreg %buf, i32 %off) {
|
||||
; GCN-LABEL: test1_ptrs:
|
||||
; GCN: ; %bb.0: ; %.entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8
|
||||
; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
.entry:
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 0, ptr addrspace(8) %buf, i32 8, i32 0, i32 0)
|
||||
%val = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %buf, i32 %off, i32 0, i32 0)
|
||||
@ -26,11 +35,16 @@ define amdgpu_cs void @test1_ptrs(ptr addrspace(8) inreg %buf, i32 %off) {
|
||||
|
||||
;; In the future, the stores should be reorderable because they'd be known to be
|
||||
;; at distinct offsets.
|
||||
; GCN-LABEL: {{^}}test1_ptrs_reorderable:
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_cs void @test1_ptrs_reorderable(ptr addrspace(8) inreg %buf, i32 %off) {
|
||||
; GCN-LABEL: test1_ptrs_reorderable:
|
||||
; GCN: ; %bb.0: ; %.entry
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8
|
||||
; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GCN-NEXT: s_endpgm
|
||||
.entry:
|
||||
%shifted.off = shl i32 %off, 4
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 0, ptr addrspace(8) %buf, i32 8, i32 0, i32 0)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
; Divergent phis that don't require lowering using lane mask merging
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
@flat = external global i32, align 4
|
||||
@global = external addrspace(1) global i32, align 4
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
|
||||
define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(ptr addrspace(8) inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
|
||||
define amdgpu_ps void @raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: raw_ptr_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
|
||||
define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i8 %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i8 %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(half %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; GFX8-LABEL: name: struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
|
||||
|
||||
define amdgpu_ps void @struct_ptr_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(half %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; UNPACKED-LABEL: name: struct_ptr_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s
|
||||
|
||||
define amdgpu_ps void @struct_ptr_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
; CHECK-LABEL: name: struct_ptr_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -new-reg-bank-select -mtriple=amdgcn -o - %s | FileCheck %s
|
||||
|
||||
define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspace(6) inreg %arg3) {
|
||||
; CHECK-LABEL: test1:
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -stop-after=amdgpu-regbanklegalize -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
@ -69,20 +68,19 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32), [[INTRINSIC_CONVERGENT2]](s32), [[INTRINSIC_CONVERGENT3]](s32)
|
||||
; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT4]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -117,23 +115,22 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[INTRINSIC_CONVERGENT]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT1]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[INTRINSIC_CONVERGENT]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@ -163,35 +160,34 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32), [[INTRINSIC_CONVERGENT2]](s32), [[INTRINSIC_CONVERGENT3]](s32)
|
||||
; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[INTRINSIC_CONVERGENT4]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT5]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[INTRINSIC_CONVERGENT4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 8)
|
||||
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -stop-after=amdgpu-regbanklegalize -o - %s | FileCheck %s
|
||||
|
||||
; Natural mapping
|
||||
define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
||||
@ -69,20 +68,19 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32), [[INTRINSIC_CONVERGENT2]](s32), [[INTRINSIC_CONVERGENT3]](s32)
|
||||
; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT4]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -116,7 +114,7 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
@ -124,16 +122,15 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[INTRINSIC_CONVERGENT]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT1]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8)
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[INTRINSIC_CONVERGENT]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8)
|
||||
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
@ -162,7 +159,7 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7
|
||||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
@ -170,28 +167,27 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32), [[INTRINSIC_CONVERGENT2]](s32), [[INTRINSIC_CONVERGENT3]](s32)
|
||||
; CHECK-NEXT: [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[UV6:%[0-9]+]]:sgpr(s64), [[UV7:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[INTRINSIC_CONVERGENT4]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT5]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8)
|
||||
; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[INTRINSIC_CONVERGENT4]], 0, 0, -1 :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8)
|
||||
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GISEL %s
|
||||
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GISEL %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=SDAG %s
|
||||
|
||||
define amdgpu_kernel void @buffer_ptr_vector_ops(ptr addrspace(1) %somewhere) {
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 | FileCheck -check-prefix=GFX10 %s
|
||||
;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 | FileCheck -check-prefix=GFX11 %s
|
||||
;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck -check-prefix=GFX12 %s
|
||||
;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 | FileCheck -check-prefix=GFX12 %s
|
||||
;RUN: llc < %s -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 | FileCheck -check-prefix=GFX12 %s
|
||||
|
||||
define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
|
||||
; PREGFX10-LABEL: tbuffer_store:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s --check-prefixes=SDAG
|
||||
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s --check-prefixes=GISEL
|
||||
; RUN: llc < %s -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s --check-prefixes=GISEL
|
||||
|
||||
define amdgpu_kernel void @buffers_dont_alias(ptr addrspace(8) noalias %a, ptr addrspace(8) noalias %b) {
|
||||
; SDAG-LABEL: buffers_dont_alias:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=SDAG %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GISEL %s
|
||||
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GISEL %s
|
||||
|
||||
; Check that in strict OOB mode for buffers (relaxed-buffer-oob-mode attribute not set) the underaligned loads and stores get split.
|
||||
; FIXME: The loads/stores do not get split (extend amdgpu-lower-buffer-fat-pointers?).
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user