And rework the lit64() support to use it. The rules for when to add lit64() can be simplified and improved. In this change, however, we just follow the existing conventions on the assembler and disassembler sides. In codegen we do not (and normally should not need to) add explicit lit() and lit64() modifiers, so the codegen tests lose them. The change is an NFCI otherwise. Simplifies printing operands.
787 lines
30 KiB
LLVM
787 lines
30 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-SDAG %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-SDAG %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-GISEL %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-GISEL %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s
|
|
|
|
; Scalar data prefetch
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:512 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check large offsets
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_max_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:8388607 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_max_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_min_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:-8388608 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], 0xffffffffff800000
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_min_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
|
|
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0x800000
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check divergent address
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global(ptr addrspace(1) %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_global(ptr addrspace(1) inreg %ptr, i32 %offset) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX12: ; %bb.0: ; %entry
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_global:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_flat(ptr inreg %ptr, i32 %offset) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX12: ; %bb.0: ; %entry
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_flat:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_endpgm
|
|
entry:
|
|
%gep1 = getelementptr i8, ptr %ptr, i32 %offset
|
|
%gep2 = getelementptr i8, ptr %gep1, i32 128
|
|
tail call void @llvm.prefetch.pf(ptr %gep2, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check LDS and Scratch, we cannot prefetch it
|
|
|
|
define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) {
|
|
; GCN-LABEL: prefetch_data_lds:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) {
|
|
; GCN-LABEL: prefetch_data_scratch:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Check supported address spaces
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_flat:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_flat:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_global:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_global:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_constant_32bit:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_mov_b32 s1, 0
|
|
; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; I$ prefetch
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_inst_sgpr:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_inst_sgpr_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Check large offsets
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_max_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
|
|
; SPREFETCH: ; %bb.0: ; %entry
|
|
; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
|
|
; SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_max_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], 0xffffffffff800000
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s2, 0xff800000
|
|
; SPREFETCH-SDAG-NEXT: s_mov_b32 s3, -1
|
|
; SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; SPREFETCH-SDAG: ; %bb.0: ; %entry
|
|
; SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
|
|
; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm
|
|
;
|
|
; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
|
|
; SPREFETCH-GISEL: ; %bb.0: ; %entry
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
|
|
; SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
|
|
; SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
|
|
; SPREFETCH-GISEL-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
|
|
tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Check cache locality
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_dev(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 1, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_se(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat_se:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 2, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_cu(ptr %ptr) {
|
|
; GL2-ONLY-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; GL2-ONLY: ; %bb.0: ; %entry
|
|
; GL2-ONLY-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GL2-ONLY-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; SAFE-CU-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; SAFE-CU: ; %bb.0: ; %entry
|
|
; SAFE-CU-NEXT: flat_prefetch_b8 v[0:1]
|
|
; SAFE-CU-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 3, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; flat offset
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_flat_offset(ptr %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr float, ptr %ptr, i32 128
|
|
tail call void @llvm.prefetch.pf(ptr %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global_offset(ptr addrspace(1) %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr float, ptr addrspace(1) %ptr, i32 128
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global_saddr(ptr addrspace(1) inreg %ptr, i32 %voffset) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_vgpr_global_saddr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
|
|
; GFX1250-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep1 = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset
|
|
%gep2 = getelementptr i8, ptr addrspace(1) %gep1, i32 128
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep2, i32 0, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
; Cannot prefetch I$ with flat or global instructions.
|
|
|
|
define amdgpu_ps void @prefetch_inst_vgpr_global(ptr addrspace(1) %ptr) {
|
|
; GCN-LABEL: prefetch_inst_vgpr_global:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_inst_vgpr_flat(ptr %ptr) {
|
|
; GCN-LABEL: prefetch_inst_vgpr_flat:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Force vector prefetch for uniform address with rw = 1 argument.
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_flat_force_vector(ptr inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.pf(ptr %ptr, i32 1, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_global_force_vector(ptr addrspace(1) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 1, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @prefetch_data_sgpr_global_saddr_force_vector(ptr addrspace(1) inreg %ptr) {
|
|
; GFX1250-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; GFX1250-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS
|
|
; GFX1250-SPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; NOSPREFETCH: ; %bb.0: ; %entry
|
|
; NOSPREFETCH-NEXT: s_endpgm
|
|
;
|
|
; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector:
|
|
; GFX12-SPREFETCH: ; %bb.0: ; %entry
|
|
; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x400, null, 0
|
|
; GFX12-SPREFETCH-NEXT: s_endpgm
|
|
entry:
|
|
%gep = getelementptr i8, ptr addrspace(1) %ptr, i32 1024
|
|
tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 1, i32 0, i32 1)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
|
|
declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)
|