Cover all the missing cases and add very detailed tests for each rule. In summary: - Flat and Scratch, addrspace(0) and addrspace(5), loads are always divergent. - Global and Constant, addrspace(1) and addrspace(4), have real uniform loads, s_load, but require additional checks for align and flags in mmo. For not natural align or not uniform mmo do uniform-in-vgpr lowering. - Private, addrspace(3), only has instructions for divergent load, for uniform do uniform-in-vgpr lowering. - Store rules are simplified using Ptr32 and Ptr64. All operands need to be vgpr. Some tests have code size regression since they use more sgpr instructions, marked with FixMe comment to get back to later.
70 lines
2.8 KiB
LLVM
70 lines
2.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -new-reg-bank-select -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
|
|
|
|
define void @local_to_flat(ptr addrspace(3) %ptr) {
|
|
; ASM-LABEL: local_to_flat:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
|
|
; ASM-NEXT: v_mov_b32_e32 v1, s5
|
|
; ASM-NEXT: v_mov_b32_e32 v2, 7
|
|
; ASM-NEXT: flat_store_dword v[0:1], v2
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
|
|
store volatile i32 7, ptr %1, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @private_to_flat(ptr addrspace(5) %ptr) {
|
|
; ASM-LABEL: private_to_flat:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_mov_b64 s[4:5], src_private_base
|
|
; ASM-NEXT: v_mov_b32_e32 v1, s5
|
|
; ASM-NEXT: v_mov_b32_e32 v2, 7
|
|
; ASM-NEXT: flat_store_dword v[0:1], v2
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) %ptr)
|
|
store volatile i32 7, ptr %1, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @flat_to_local(ptr %ptr) {
|
|
; ASM-LABEL: flat_to_local:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: v_mov_b32_e32 v1, 7
|
|
; ASM-NEXT: ds_write_b32 v0, v1
|
|
; ASM-NEXT: s_waitcnt lgkmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr %ptr)
|
|
store volatile i32 7, ptr addrspace(3) %1, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @flat_to_private(ptr %ptr) {
|
|
; ASM-LABEL: flat_to_private:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: v_mov_b32_e32 v1, 7
|
|
; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; ASM-NEXT: s_waitcnt vmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr %ptr)
|
|
store volatile i32 7, ptr addrspace(5) %1, align 4
|
|
ret void
|
|
}
|
|
|
|
declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3))
|
|
declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5))
|
|
declare ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr)
|
|
declare ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr)
|
|
|
|
declare <4 x ptr> @llvm.amdgcn.addrspacecast.nonnull.v4p0.v4p3(<4 x ptr addrspace(3)>)
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; DAGISEL-ASM: {{.*}}
|
|
; GISEL-ASM: {{.*}}
|