llvm-project/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
Ivan Kosarev 9e55d81c68
[AMDGPU][AsmParser] Introduce MC representation for lit() and lit64(). (#160316)
And rework the lit64() support to use it.

The rules for when to add lit64() can be simplified and
improved. In this change, however, we just follow the existing
conventions on the assembler and disassembler sides.

In codegen we do not (and normally should not need to) add explicit
lit() and lit64() modifiers, so the codegen tests lose them. The change
is an NFCI otherwise.

Simplifies printing operands.
2025-09-24 12:35:50 +01:00

787 lines
30 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
; Scalar data prefetch
define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:512 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
; Check large offsets
define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_max_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:8388607 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_min_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:-8388608 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], 0xffffffffff800000
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_min_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_too_large_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0x800000
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_too_large_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
ret void
}
; Check divergent address
define amdgpu_ps void @prefetch_data_vgpr_global(ptr addrspace(1) %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_global:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_flat(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_global(ptr addrspace(1) inreg %ptr, i32 %offset) {
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_endpgm
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_global:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_flat(ptr inreg %ptr, i32 %offset) {
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_endpgm
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_endpgm
entry:
%gep1 = getelementptr i8, ptr %ptr, i32 %offset
%gep2 = getelementptr i8, ptr %gep1, i32 128
tail call void @llvm.prefetch.pf(ptr %gep2, i32 0, i32 0, i32 1)
ret void
}
; Check LDS and Scratch, we cannot prefetch it
define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) {
; GCN-LABEL: prefetch_data_lds:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) {
; GCN-LABEL: prefetch_data_scratch:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1)
ret void
}
; Check supported address spaces
define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_flat:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_flat:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_global:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_global:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_constant_32bit:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_mov_b32 s1, 0
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
ret void
}
; I$ prefetch
define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_inst_sgpr:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_inst_sgpr_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
; Check large offsets
define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_max_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; SPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
; SPREFETCH: ; %bb.0: ; %entry
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
; SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_min_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], 0xffffffffff800000
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_min_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
; GFX1250-LABEL: prefetch_inst_sgpr_too_large_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_too_large_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
; SPREFETCH-SDAG: ; %bb.0: ; %entry
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-SDAG-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
;
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
; SPREFETCH-GISEL: ; %bb.0: ; %entry
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
; SPREFETCH-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
ret void
}
; Check cache locality
define amdgpu_ps void @prefetch_data_vgpr_flat_dev(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat_dev:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 1, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_flat_se(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat_se:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 2, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_flat_cu(ptr %ptr) {
; GL2-ONLY-LABEL: prefetch_data_vgpr_flat_cu:
; GL2-ONLY: ; %bb.0: ; %entry
; GL2-ONLY-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GL2-ONLY-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; SAFE-CU-LABEL: prefetch_data_vgpr_flat_cu:
; SAFE-CU: ; %bb.0: ; %entry
; SAFE-CU-NEXT: flat_prefetch_b8 v[0:1]
; SAFE-CU-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 3, i32 1)
ret void
}
; flat offset
define amdgpu_ps void @prefetch_data_vgpr_flat_offset(ptr %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_flat_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr %ptr, i32 128
tail call void @llvm.prefetch.pf(ptr %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_global_offset(ptr addrspace(1) %ptr) {
; GFX1250-LABEL: prefetch_data_vgpr_global_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr float, ptr addrspace(1) %ptr, i32 128
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_global_saddr(ptr addrspace(1) inreg %ptr, i32 %voffset) {
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_vgpr_global_saddr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr_offset:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep1 = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
%gep2 = getelementptr i8, ptr addrspace(1) %gep1, i32 128
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep2, i32 0, i32 0, i32 1)
ret void
}
; Cannot prefetch I$ with flat or global instructions.
define amdgpu_ps void @prefetch_inst_vgpr_global(ptr addrspace(1) %ptr) {
; GCN-LABEL: prefetch_inst_vgpr_global:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @prefetch_inst_vgpr_flat(ptr %ptr) {
; GCN-LABEL: prefetch_inst_vgpr_flat:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 0)
ret void
}
; Force vector prefetch for uniform address with rw = 1 argument.
define amdgpu_ps void @prefetch_data_sgpr_flat_force_vector(ptr inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_flat_force_vector:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.pf(ptr %ptr, i32 1, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_global_force_vector(ptr addrspace(1) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_global_force_vector:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 1, i32 0, i32 1)
ret void
}
define amdgpu_ps void @prefetch_data_sgpr_global_saddr_force_vector(ptr addrspace(1) inreg %ptr) {
; GFX1250-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
;
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
; GFX1250-SPREFETCH-NEXT: s_endpgm
;
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; NOSPREFETCH: ; %bb.0: ; %entry
; NOSPREFETCH-NEXT: s_endpgm
;
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
; GFX12-SPREFETCH: ; %bb.0: ; %entry
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x400, null, 0
; GFX12-SPREFETCH-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 1024
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 1, i32 0, i32 1)
ret void
}
declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)