
This is in preparation for a patch that disables folding offsets into FLAT instructions if the corresponding address computation is not inbounds, to avoid miscompilations where this would lead to wrong aperture check results. With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT instructions, the outputs for these tests won't change. For SWDEV-516125.
101 lines
3.1 KiB
LLVM
101 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s
|
|
|
|
declare void @llvm.amdgcn.flat.prefetch(ptr %ptr, i32 %col)
|
|
|
|
define amdgpu_ps void @flat_prefetch(ptr %ptr) {
|
|
; GCN-LABEL: flat_prefetch:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v[0:1]
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %ptr, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_sgpr(ptr inreg %ptr) {
|
|
; GCN-LABEL: flat_prefetch_sgpr:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
|
; GCN-NEXT: flat_prefetch_b8 v0, s[0:1]
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %ptr, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_offset(ptr %ptr) {
|
|
; GCN-LABEL: flat_prefetch_offset:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v[0:1] offset:512
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr inbounds i32, ptr %ptr, i32 128
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %gep, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_sgpr_voffset(ptr inreg %ptr, i32 %offset) {
|
|
; GCN-LABEL: flat_prefetch_sgpr_voffset:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v0, s[0:1]
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr %ptr, i32 %offset
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %gep, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_sgpr_voffset_offset(ptr inreg %ptr, i32 %offset) {
|
|
; GCN-LABEL: flat_prefetch_sgpr_voffset_offset:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
%gep1 = getelementptr i8, ptr %ptr, i32 %offset
|
|
%gep2 = getelementptr i8, ptr %gep1, i32 128
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %gep2, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_se(ptr %ptr) {
|
|
; GCN-LABEL: flat_prefetch_se:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %ptr, i32 8)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_se_nt(ptr %ptr) {
|
|
; GCN-LABEL: flat_prefetch_se_nt:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v[0:1] th:TH_LOAD_NT scope:SCOPE_SE
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %ptr, i32 9)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_dev_ht(ptr %ptr) {
|
|
; GCN-LABEL: flat_prefetch_dev_ht:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v[0:1] th:TH_LOAD_HT scope:SCOPE_DEV
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %ptr, i32 18)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @flat_prefetch_sys_lu(ptr %ptr) {
|
|
; GCN-LABEL: flat_prefetch_sys_lu:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: flat_prefetch_b8 v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.amdgcn.flat.prefetch(ptr %ptr, i32 27)
|
|
ret void
|
|
}
|