When we have a BITCAST and the source type is a vector with smaller elements compared to the destination type, then we need to demand all the source elements that make up the demanded elts for the result when doing recursive calls to SimplifyDemandedBits, SimplifyDemandedVectorElts and SimplifyMultipleUseDemandedBits. Problem is that those simplifications are allowed to turn non-demanded elements of a vector into POISON, so unless we demand all source elements that make up the result there is a risk that the result would be more poisonous (even for demanded elts) after the simplification. The patch fixes some bugs in SimplifyMultipleUseDemandedBits and SimplifyDemandedBits for situations when we did not consider the problem described above. Now we make sure that we also demand vector elements that "must not be turned into poison" even if those elements correspond to bits that does not need to be defined according to the DemandedBits mask. Fixes #138513
530 lines
40 KiB
LLVM
530 lines
40 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -stop-after=amdgpu-isel < %s | FileCheck --check-prefix=CHECK45 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck --check-prefix=CHECK45 %s
|
|
|
|
define amdgpu_ps ptr addrspace(8) @basic_raw_buffer(ptr inreg %p) {
|
|
; CHECK-LABEL: name: basic_raw_buffer
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY2]], implicit $exec
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
|
|
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_1]]
|
|
; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 5678
|
|
; CHECK-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_3]]
|
|
; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK-NEXT: $sgpr2 = COPY [[S_MOV_B32_2]]
|
|
; CHECK-NEXT: $sgpr3 = COPY [[S_MOV_B32_4]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
;
|
|
; CHECK45-LABEL: name: basic_raw_buffer
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -6629298651489370112
|
|
; CHECK45-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[REG_SEQUENCE]], killed [[S_MOV_B]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 9
|
|
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_]]
|
|
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -536870912
|
|
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_2]]
|
|
; CHECK45-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK45-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK45-NEXT: $sgpr2 = COPY [[S_MOV_B32_1]]
|
|
; CHECK45-NEXT: $sgpr3 = COPY [[S_MOV_B32_3]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr %p, i16 0, i64 1234, i32 5678)
|
|
ret ptr addrspace(8) %rsrc
|
|
}
|
|
|
|
define amdgpu_ps float @read_raw_buffer(ptr addrspace(1) inreg %p) {
|
|
; CHECK-LABEL: name: read_raw_buffer
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_1]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
;
|
|
; CHECK45-LABEL: name: read_raw_buffer
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
|
|
; CHECK45-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET killed [[REG_SEQUENCE1]], $sgpr_null, 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
|
|
; CHECK45-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) %p, i16 0, i64 0, i32 0)
|
|
%loaded = call float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
|
|
ret float %loaded
|
|
}
|
|
|
|
define amdgpu_ps ptr addrspace(8) @basic_struct_buffer(ptr inreg %p) {
|
|
; CHECK-LABEL: name: basic_struct_buffer
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 262144
|
|
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY2]], implicit $exec
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec
|
|
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
|
|
; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_2]]
|
|
; CHECK-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 5678
|
|
; CHECK-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_4]]
|
|
; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK-NEXT: $sgpr2 = COPY [[S_MOV_B32_3]]
|
|
; CHECK-NEXT: $sgpr3 = COPY [[S_MOV_B32_5]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
;
|
|
; CHECK45-LABEL: name: basic_struct_buffer
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -6629298651489370112
|
|
; CHECK45-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[REG_SEQUENCE]], killed [[S_MOV_B]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 9
|
|
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_]]
|
|
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -536854528
|
|
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_2]]
|
|
; CHECK45-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK45-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK45-NEXT: $sgpr2 = COPY [[S_MOV_B32_1]]
|
|
; CHECK45-NEXT: $sgpr3 = COPY [[S_MOV_B32_3]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr %p, i16 4, i64 1234, i32 5678)
|
|
ret ptr addrspace(8) %rsrc
|
|
}
|
|
|
|
define amdgpu_ps ptr addrspace(8) @variable_top_half(ptr inreg %p, i64 inreg %numVals, i32 inreg %flags) {
|
|
; CHECK-LABEL: name: variable_top_half
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[DEF]], %subreg.sub1
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 262144
|
|
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY6]], implicit $exec
|
|
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
|
|
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
|
|
; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
|
|
; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
;
|
|
; CHECK45-LABEL: name: variable_top_half
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
|
|
; CHECK45-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 25
|
|
; CHECK45-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 killed [[COPY5]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[REG_SEQUENCE]], killed [[REG_SEQUENCE2]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1
|
|
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
|
|
; CHECK45-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], killed [[S_MOV_B32_2]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, killed [[S_LSHL_B32_1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
|
; CHECK45-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[REG_SEQUENCE1]], killed [[S_MOV_B32_3]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_LSHR_B64_]], killed [[REG_SEQUENCE3]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 70368744177664
|
|
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY killed [[S_MOV_B]]
|
|
; CHECK45-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_OR_B64_1]], killed [[COPY7]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub1
|
|
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_LSHR_B64_]].sub0
|
|
; CHECK45-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
|
|
; CHECK45-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
|
|
; CHECK45-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK45-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_3]]
|
|
; CHECK45-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr %p, i16 4, i64 %numVals, i32 %flags)
|
|
ret ptr addrspace(8) %rsrc
|
|
}
|
|
|
|
define amdgpu_ps ptr addrspace(8) @general_case(ptr inreg %p, i16 inreg %stride, i64 inreg %numVals, i32 inreg %flags) {
|
|
; CHECK-LABEL: name: general_case
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr5
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[DEF]], %subreg.sub1
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_LSHL_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY6]], implicit $exec
|
|
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY7]], implicit $exec
|
|
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
|
|
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
|
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
|
|
; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
|
|
; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
;
|
|
; CHECK45-LABEL: name: general_case
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
|
; CHECK45-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
|
|
; CHECK45-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
|
; CHECK45-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[REG_SEQUENCE1]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]]
|
|
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 12
|
|
; CHECK45-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 killed [[COPY6]], killed [[S_MOV_B32_1]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, killed [[S_LSHL_B32_]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_LSHR_B64_]], killed [[REG_SEQUENCE2]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 28
|
|
; CHECK45-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], killed [[S_MOV_B32_3]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, killed [[S_LSHL_B32_1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_OR_B64_]], killed [[REG_SEQUENCE3]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_1]].sub1
|
|
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
|
|
; CHECK45-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 25
|
|
; CHECK45-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 killed [[COPY8]], killed [[S_MOV_B32_4]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, killed [[S_LSHL_B32_2]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[REG_SEQUENCE]], killed [[REG_SEQUENCE4]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub1
|
|
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_LSHR_B64_]].sub0
|
|
; CHECK45-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
|
|
; CHECK45-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
|
|
; CHECK45-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]]
|
|
; CHECK45-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_]]
|
|
; CHECK45-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_3]]
|
|
; CHECK45-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_1]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
|
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr %p, i16 %stride, i64 %numVals, i32 %flags)
|
|
ret ptr addrspace(8) %rsrc
|
|
}
|
|
|
|
define amdgpu_ps float @general_case_load(ptr inreg %p, i16 inreg %stride, i64 inreg %numVals, i32 inreg %flags) {
|
|
; CHECK-LABEL: name: general_case_load
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr5
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[DEF]], %subreg.sub1
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_LSHL_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, killed [[S_OR_B32_]], %subreg.sub1, killed [[COPY5]], %subreg.sub2, [[COPY]], %subreg.sub3
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY6]], killed [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
;
|
|
; CHECK45-LABEL: name: general_case_load
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
|
|
; CHECK45-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
|
|
; CHECK45-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
|
; CHECK45-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[REG_SEQUENCE1]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]]
|
|
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 12
|
|
; CHECK45-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 killed [[COPY6]], killed [[S_MOV_B32_1]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, killed [[S_LSHL_B32_]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_LSHR_B64_]], killed [[REG_SEQUENCE2]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 28
|
|
; CHECK45-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], killed [[S_MOV_B32_3]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, killed [[S_LSHL_B32_1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_OR_B64_]], killed [[REG_SEQUENCE3]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_1]].sub1
|
|
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_1]].sub0
|
|
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
|
|
; CHECK45-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 25
|
|
; CHECK45-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 killed [[COPY9]], killed [[S_MOV_B32_4]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, killed [[S_LSHL_B32_2]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[REG_SEQUENCE]], killed [[REG_SEQUENCE4]], implicit-def dead $scc
|
|
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub1
|
|
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_2]].sub0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
|
|
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
|
|
; CHECK45-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_IDXEN [[COPY12]], killed [[REG_SEQUENCE5]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
|
|
; CHECK45-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_IDXEN]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr %p, i16 %stride, i64 %numVals, i32 %flags)
|
|
%value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
|
|
ret float %value
|
|
}
|
|
|
|
; None of the components are uniform due to the lack of an inreg
|
|
define amdgpu_ps float @general_case_load_with_waterfall(ptr %p, i16 %stride, i64 %numVals, i32 %flags) {
|
|
; CHECK-LABEL: name: general_case_load_with_waterfall
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[DEF]], %subreg.sub1
|
|
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY3]], killed [[S_MOV_B32_]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, killed [[V_AND_OR_B32_e64_]], %subreg.sub1, killed [[COPY5]], %subreg.sub2, [[COPY]], %subreg.sub3
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
|
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY [[REG_SEQUENCE1]]
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY6]], killed [[COPY7]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
;
|
|
; CHECK45-LABEL: name: general_case_load_with_waterfall
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; CHECK45-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; CHECK45-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
|
|
; CHECK45-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 28
|
|
; CHECK45-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_]], [[COPY]], implicit $exec
|
|
; CHECK45-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK45-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, killed [[V_LSHLREV_B32_e64_]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
|
|
; CHECK45-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 7
|
|
; CHECK45-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
|
|
; CHECK45-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 killed [[S_MOV_B32_1]], [[COPY7]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[V_LSHRREV_B64_e64_]].sub1
|
|
; CHECK45-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
|
|
; CHECK45-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 12
|
|
; CHECK45-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_2]], killed [[COPY9]], implicit $exec
|
|
; CHECK45-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, killed [[V_LSHLREV_B32_e64_1]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE3]].sub1
|
|
; CHECK45-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 killed [[COPY8]], killed [[COPY10]], killed [[COPY6]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
|
|
; CHECK45-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[V_LSHRREV_B64_e64_]].sub0
|
|
; CHECK45-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE3]].sub0
|
|
; CHECK45-NEXT: [[V_OR3_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 killed [[COPY12]], killed [[COPY13]], killed [[COPY11]], implicit $exec
|
|
; CHECK45-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_OR3_B32_e64_1]], %subreg.sub0, killed [[V_OR3_B32_e64_]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
|
|
; CHECK45-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
|
|
; CHECK45-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
|
; CHECK45-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
|
|
; CHECK45-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 25
|
|
; CHECK45-NEXT: [[V_LSHLREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_3]], killed [[COPY17]], implicit $exec
|
|
; CHECK45-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, killed [[V_LSHLREV_B32_e64_2]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub1
|
|
; CHECK45-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY16]], killed [[COPY18]], implicit $exec
|
|
; CHECK45-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE5]].sub0
|
|
; CHECK45-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY19]], killed [[COPY20]], implicit $exec
|
|
; CHECK45-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_OR_B32_e64_1]], %subreg.sub0, killed [[V_OR_B32_e64_]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE6]].sub1
|
|
; CHECK45-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE6]].sub0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[COPY22]], %subreg.sub0, killed [[COPY21]], %subreg.sub1, killed [[COPY15]], %subreg.sub2, killed [[COPY14]], %subreg.sub3
|
|
; CHECK45-NEXT: [[COPY23:%[0-9]+]]:sgpr_128 = COPY [[REG_SEQUENCE7]]
|
|
; CHECK45-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_IDXEN [[V_MOV_B32_e32_]], killed [[COPY23]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8)
|
|
; CHECK45-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_IDXEN]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr %p, i16 %stride, i64 %numVals, i32 %flags)
|
|
%value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
|
|
ret float %value
|
|
}
|
|
|
|
define amdgpu_ps float @read_buffer_fat_ptr_p0(ptr inreg %p) {
|
|
; CHECK-LABEL: name: read_buffer_fat_ptr_p0
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_1]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr, align 1, addrspace 8)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
;
|
|
; CHECK45-LABEL: name: read_buffer_fat_ptr_p0
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
|
|
; CHECK45-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET killed [[REG_SEQUENCE1]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr, align 1, addrspace 8)
|
|
; CHECK45-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
%ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr %p, i16 0, i64 0, i32 0)
|
|
%loaded = load float, ptr addrspace(7) %ptr
|
|
ret float %loaded
|
|
}
|
|
|
|
define amdgpu_ps float @read_buffer_fat_ptr_p1(ptr addrspace(1) inreg %p) {
|
|
; CHECK-LABEL: name: read_buffer_fat_ptr_p1
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_1]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr, align 1, addrspace 8)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
;
|
|
; CHECK45-LABEL: name: read_buffer_fat_ptr_p1
|
|
; CHECK45: bb.0 (%ir-block.0):
|
|
; CHECK45-NEXT: liveins: $sgpr0, $sgpr1
|
|
; CHECK45-NEXT: {{ $}}
|
|
; CHECK45-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
|
|
; CHECK45-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
|
; CHECK45-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
|
|
; CHECK45-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; CHECK45-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; CHECK45-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
|
|
; CHECK45-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_OFFSET killed [[REG_SEQUENCE1]], $sgpr_null, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr, align 1, addrspace 8)
|
|
; CHECK45-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_OFFSET]]
|
|
; CHECK45-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
%ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %p, i16 0, i64 0, i32 0)
|
|
%loaded = load float, ptr addrspace(7) %ptr
|
|
ret float %loaded
|
|
}
|
|
|
|
declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr nocapture readnone, i16, i64, i32)
|
|
declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) nocapture readnone, i16, i64, i32)
|
|
declare ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p0(ptr nocapture readnone, i16, i64, i32)
|
|
declare ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) nocapture readnone, i16, i64, i32)
|
|
declare float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg)
|
|
declare float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32, i32 immarg)
|