
We have a choice to use a scalar or vector prefetch for an uniform pointer. Since we do not have scalar stores our scalar cache is practically readonly. The rw argument of the prefetch intrinsic is used to force vector operation even for an uniform case. On GFX12 scalar prefetch will be used anyway, it is still useful but it will only bring data to L2.
787 lines
30 KiB
LLVM
787 lines
30 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-SDAG %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-SDAG %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-GISEL %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-GISEL %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
|
|
; Scalar data prefetch
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:512 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check large offsets
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_max_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:8388607 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_min_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:-8388608 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], lit64(0xffffffffff800000)
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_min_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
|
|
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0x800000
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check divergent address
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global(ptr addrspace(1) %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_global(ptr addrspace(1) inreg %ptr, i32 %offset) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX12: ; %bb.0: ; %entry
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_flat(ptr inreg %ptr, i32 %offset) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX12: ; %bb.0: ; %entry
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_endpgm
|
|
entry:
|
|
%gep1 = getelementptr i8, ptr %ptr, i32 %offset
|
|
%gep2 = getelementptr i8, ptr %gep1, i32 128
|
|
tail call void @llvm.prefetch.pf(ptr %gep2, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check LDS and Scratch, we cannot prefetch it
|
|
|
|
define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) {
|
|
; GCN-LABEL: prefetch_data_lds:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) {
|
|
; GCN-LABEL: prefetch_data_scratch:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check supported address spaces
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_flat:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_flat:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_global:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_global:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_constant_32bit:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_mov_b32 s1, 0
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; I$ prefetch
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_inst_sgpr:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_inst_sgpr_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Check large offsets
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_max_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], lit64(0xffffffffff800000)
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
|
|
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Check cache locality
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_dev(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 1, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_se(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat_se:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 2, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_cu(ptr %ptr) {
|
|
; GL2-ONLY-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; GL2-ONLY: ; %bb.0: ; %entry
|
|
; GL2-ONLY-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GL2-ONLY-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SAFE-CU-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; SAFE-CU: ; %bb.0: ; %entry
|
|
; SAFE-CU-NEXT: flat_prefetch_b8 v[0:1]
|
|
; SAFE-CU-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 3, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; flat offset
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_offset(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr float, ptr %ptr, i32 128
|
|
tail call void @llvm.prefetch.pf(ptr %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global_offset(ptr addrspace(1) %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr float, ptr addrspace(1) %ptr, i32 128
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global_saddr(ptr addrspace(1) inreg %ptr, i32 %voffset) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global_saddr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep1 = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
|
|
%gep2 = getelementptr i8, ptr addrspace(1) %gep1, i32 128
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep2, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Cannot prefetch I$ with flat or global instructions.
|
|
|
|
define amdgpu_ps void @prefetch_inst_vgpr_global(ptr addrspace(1) %ptr) {
|
|
; GCN-LABEL: prefetch_inst_vgpr_global:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_vgpr_flat(ptr %ptr) {
|
|
; GCN-LABEL: prefetch_inst_vgpr_flat:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Force vector prefetch for uniform address with rw = 1 argument.
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_flat_force_vector(ptr inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 1, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_global_force_vector(ptr addrspace(1) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 1, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_global_saddr_force_vector(ptr addrspace(1) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x400, null, 0
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 1024
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 1, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)
|