llvm-project/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
Stanislav Mekhanoshin 96e5eed92a
[AMDGPU] Select VMEM prefetch for llvm.prefetch on gfx1250 (#150493)
We have a choice to use a scalar or vector prefetch for an uniform
pointer. Since we do not have scalar stores our scalar cache is
practically readonly. The rw argument of the prefetch intrinsic is
used to force vector operation even for an uniform case. On GFX12
scalar prefetch will be used anyway, it is still useful but it will
only bring data to L2.
2025-07-24 13:22:50 -07:00

787 lines
30 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; Scalar data prefetch
define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:512 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
; Check large offsets
define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_max_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:8388607 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_min_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:-8388608 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], lit64(0xffffffffff800000)
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_min_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_too_large_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0x800000
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_too_large_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
; Check divergent address
define amdgpu_ps void @prefetch_data_vgpr_global(ptr addrspace(1) %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_global:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_flat(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_global(ptr addrspace(1) inreg %ptr, i32 %offset) {
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_endpgm
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_flat(ptr inreg %ptr, i32 %offset) {
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_endpgm
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
entry:
%gep1 = getelementptr i8, ptr %ptr, i32 %offset
%gep2 = getelementptr i8, ptr %gep1, i32 128
tail call void @llvm.prefetch.pf(ptr %gep2, i32 0, i32 0, i32 1)
ret void
}
; Check LDS and Scratch, we cannot prefetch it
define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) {
; GCN-LABEL: prefetch_data_lds:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) {
; GCN-LABEL: prefetch_data_scratch:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1)
ret void
}
; Check supported address spaces
define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_flat:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_flat:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_global:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_global:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_constant_32bit:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_mov_b32 s1, 0
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
ret void
}
; I$ prefetch
define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_inst_sgpr:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_inst_sgpr_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
; Check large offsets
define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_max_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_min_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], lit64(0xffffffffff800000)
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_min_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_too_large_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_too_large_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
; Check cache locality
define amdgpu_ps void @prefetch_data_vgpr_flat_dev(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat_dev:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 1, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_flat_se(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat_se:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 2, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_flat_cu(ptr %ptr) {
; GL2-ONLY-LABEL: prefetch_data_vgpr_flat_cu:
; GL2-ONLY: ; %bb.0: ; %entry
; GL2-ONLY-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GL2-ONLY-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; SAFE-CU-LABEL: prefetch_data_vgpr_flat_cu:
; SAFE-CU: ; %bb.0: ; %entry
; SAFE-CU-NEXT: flat_prefetch_b8 v[0:1]
; SAFE-CU-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 3, i32 1)
ret void
}
; flat offset
define amdgpu_ps void @prefetch_data_vgpr_flat_offset(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr %ptr, i32 128
tail call void @llvm.prefetch.pf(ptr %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_global_offset(ptr addrspace(1) %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_global_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr addrspace(1) %ptr, i32 128
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_global_saddr(ptr addrspace(1) inreg %ptr, i32 %voffset) {
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_global_saddr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep1 = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
%gep2 = getelementptr i8, ptr addrspace(1) %gep1, i32 128
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep2, i32 0, i32 0, i32 1)
ret void
}
; Cannot prefetch I$ with flat or global instructions.
define amdgpu_ps void @prefetch_inst_vgpr_global(ptr addrspace(1) %ptr) {
; GCN-LABEL: prefetch_inst_vgpr_global:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_vgpr_flat(ptr %ptr) {
; GCN-LABEL: prefetch_inst_vgpr_flat:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 0)
ret void
}
; Force vector prefetch for uniform address with rw = 1 argument.
define amdgpu_ps void @prefetch_data_sgpr_flat_force_vector(ptr inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_flat_force_vector:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 1, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_global_force_vector(ptr addrspace(1) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_global_force_vector:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 1, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_global_saddr_force_vector(ptr addrspace(1) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x400, null, 0
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 1024
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 1, i32 0, i32 1)
ret void
}
declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)