llvm-project/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
Alexander Timofeev fbdea5a2e9 [AMDGPU] Always select s_cselect_b32 for uniform 'select' SDNode
This patch contains changes necessary to carry physical condition register (SCC) dependencies through the SDNode scheduler.  It adds the edge in the SDNodeScheduler dependency graph instead of inserting the SCC copy between each definition and use. This approach lets the scheduler place instructions in an optimal way placing the copy only when the dependency cannot be resolved.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D133593
2022-09-15 22:03:56 +02:00

536 lines
20 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s
define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) {
; GFX8V3-LABEL: addrspacecast:
; GFX8V3: ; %bb.0:
; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
; GFX8V3-NEXT: v_mov_b32_e32 v4, 1
; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1
; GFX8V3-NEXT: s_cselect_b32 s3, s3, 0
; GFX8V3-NEXT: s_cselect_b32 s0, s0, 0
; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1
; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
; GFX8V3-NEXT: v_mov_b32_e32 v1, s3
; GFX8V3-NEXT: s_cselect_b32 s0, s2, 0
; GFX8V3-NEXT: s_cselect_b32 s1, s1, 0
; GFX8V3-NEXT: v_mov_b32_e32 v2, s1
; GFX8V3-NEXT: v_mov_b32_e32 v3, s0
; GFX8V3-NEXT: flat_store_dword v[0:1], v4
; GFX8V3-NEXT: s_waitcnt vmcnt(0)
; GFX8V3-NEXT: v_mov_b32_e32 v0, 2
; GFX8V3-NEXT: flat_store_dword v[2:3], v0
; GFX8V3-NEXT: s_waitcnt vmcnt(0)
; GFX8V3-NEXT: s_endpgm
;
; GFX8V4-LABEL: addrspacecast:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
; GFX8V4-NEXT: v_mov_b32_e32 v4, 1
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1
; GFX8V4-NEXT: s_cselect_b32 s3, s3, 0
; GFX8V4-NEXT: s_cselect_b32 s0, s0, 0
; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
; GFX8V4-NEXT: v_mov_b32_e32 v1, s3
; GFX8V4-NEXT: s_cselect_b32 s0, s2, 0
; GFX8V4-NEXT: s_cselect_b32 s1, s1, 0
; GFX8V4-NEXT: v_mov_b32_e32 v2, s1
; GFX8V4-NEXT: v_mov_b32_e32 v3, s0
; GFX8V4-NEXT: flat_store_dword v[0:1], v4
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
; GFX8V4-NEXT: v_mov_b32_e32 v0, 2
; GFX8V4-NEXT: flat_store_dword v[2:3], v0
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
; GFX8V4-NEXT: s_endpgm
;
; GFX8V5-LABEL: addrspacecast:
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0xc8
; GFX8V5-NEXT: v_mov_b32_e32 v4, 1
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V5-NEXT: s_cmp_lg_u32 s0, -1
; GFX8V5-NEXT: s_cselect_b32 s2, s2, 0
; GFX8V5-NEXT: s_cselect_b32 s0, s0, 0
; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
; GFX8V5-NEXT: v_mov_b32_e32 v1, s2
; GFX8V5-NEXT: s_cselect_b32 s0, s3, 0
; GFX8V5-NEXT: s_cselect_b32 s1, s1, 0
; GFX8V5-NEXT: v_mov_b32_e32 v2, s1
; GFX8V5-NEXT: v_mov_b32_e32 v3, s0
; GFX8V5-NEXT: flat_store_dword v[0:1], v4
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: v_mov_b32_e32 v0, 2
; GFX8V5-NEXT: flat_store_dword v[2:3], v0
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
; GFX9V3-LABEL: addrspacecast:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9V3-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9V3-NEXT: s_lshl_b32 s2, s2, 16
; GFX9V3-NEXT: v_mov_b32_e32 v4, 1
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9V3-NEXT: s_cselect_b32 s2, s2, 0
; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16
; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V3-NEXT: v_mov_b32_e32 v1, s2
; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V3-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V3-NEXT: v_mov_b32_e32 v2, s1
; GFX9V3-NEXT: v_mov_b32_e32 v3, s0
; GFX9V3-NEXT: flat_store_dword v[0:1], v4
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: v_mov_b32_e32 v0, 2
; GFX9V3-NEXT: flat_store_dword v[2:3], v0
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: s_endpgm
;
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9V4-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9V4-NEXT: s_lshl_b32 s2, s2, 16
; GFX9V4-NEXT: v_mov_b32_e32 v4, 1
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9V4-NEXT: s_cselect_b32 s2, s2, 0
; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V4-NEXT: v_mov_b32_e32 v1, s2
; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V4-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V4-NEXT: v_mov_b32_e32 v2, s1
; GFX9V4-NEXT: v_mov_b32_e32 v3, s0
; GFX9V4-NEXT: flat_store_dword v[0:1], v4
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: v_mov_b32_e32 v0, 2
; GFX9V4-NEXT: flat_store_dword v[2:3], v0
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: s_endpgm
;
; GFX9V5-LABEL: addrspacecast:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9V5-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9V5-NEXT: s_lshl_b32 s2, s2, 16
; GFX9V5-NEXT: v_mov_b32_e32 v4, 1
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9V5-NEXT: s_cselect_b32 s2, s2, 0
; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
; GFX9V5-NEXT: v_mov_b32_e32 v1, s2
; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0
; GFX9V5-NEXT: s_cselect_b32 s1, s1, 0
; GFX9V5-NEXT: v_mov_b32_e32 v2, s1
; GFX9V5-NEXT: v_mov_b32_e32 v3, s0
; GFX9V5-NEXT: flat_store_dword v[0:1], v4
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: v_mov_b32_e32 v0, 2
; GFX9V5-NEXT: flat_store_dword v[2:3], v0
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: s_endpgm
%flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
%flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
store volatile i32 1, i32* %flat.private
store volatile i32 2, i32* %flat.local
ret void
}
define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) {
; GFX8V3-LABEL: llvm_amdgcn_is_shared:
; GFX8V3: ; %bb.0:
; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40
; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4
; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8V3-NEXT: flat_store_dword v[0:1], v0
; GFX8V3-NEXT: s_waitcnt vmcnt(0)
; GFX8V3-NEXT: s_endpgm
;
; GFX8V4-LABEL: llvm_amdgcn_is_shared:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40
; GFX8V4-NEXT: s_load_dword s1, s[6:7], 0x4
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8V4-NEXT: flat_store_dword v[0:1], v0
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
; GFX8V4-NEXT: s_endpgm
;
; GFX8V5-LABEL: llvm_amdgcn_is_shared:
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xcc
; GFX8V5-NEXT: s_load_dword s1, s[4:5], 0x4
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8V5-NEXT: flat_store_dword v[0:1], v0
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
; GFX9V3-LABEL: llvm_amdgcn_is_shared:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_load_dword s0, s[4:5], 0x4
; GFX9V3-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9V3-NEXT: s_lshl_b32 s1, s1, 16
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V3-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: s_endpgm
;
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dword s0, s[4:5], 0x4
; GFX9V4-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9V4-NEXT: s_lshl_b32 s1, s1, 16
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: s_endpgm
;
; GFX9V5-LABEL: llvm_amdgcn_is_shared:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dword s0, s[4:5], 0x4
; GFX9V5-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9V5-NEXT: s_lshl_b32 s1, s1, 16
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: s_endpgm
%is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
%zext = zext i1 %is.shared to i32
store volatile i32 %zext, i32 addrspace(1)* undef
ret void
}
define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) {
; GFX8V3-LABEL: llvm_amdgcn_is_private:
; GFX8V3: ; %bb.0:
; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44
; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4
; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8V3-NEXT: flat_store_dword v[0:1], v0
; GFX8V3-NEXT: s_waitcnt vmcnt(0)
; GFX8V3-NEXT: s_endpgm
;
; GFX8V4-LABEL: llvm_amdgcn_is_private:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44
; GFX8V4-NEXT: s_load_dword s1, s[6:7], 0x4
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8V4-NEXT: flat_store_dword v[0:1], v0
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
; GFX8V4-NEXT: s_endpgm
;
; GFX8V5-LABEL: llvm_amdgcn_is_private:
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xc8
; GFX8V5-NEXT: s_load_dword s1, s[4:5], 0x4
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8V5-NEXT: flat_store_dword v[0:1], v0
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
; GFX9V3-LABEL: llvm_amdgcn_is_private:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_load_dword s0, s[4:5], 0x4
; GFX9V3-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9V3-NEXT: s_lshl_b32 s1, s1, 16
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V3-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: s_endpgm
;
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dword s0, s[4:5], 0x4
; GFX9V4-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9V4-NEXT: s_lshl_b32 s1, s1, 16
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: s_endpgm
;
; GFX9V5-LABEL: llvm_amdgcn_is_private:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_load_dword s0, s[4:5], 0x4
; GFX9V5-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9V5-NEXT: s_lshl_b32 s1, s1, 16
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1
; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: s_endpgm
%is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
%zext = zext i1 %is.private to i32
store volatile i32 %zext, i32 addrspace(1)* undef
ret void
}
define amdgpu_kernel void @llvm_trap() {
; GFX8V3-LABEL: llvm_trap:
; GFX8V3: ; %bb.0:
; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5]
; GFX8V3-NEXT: s_trap 2
;
; GFX8V4-LABEL: llvm_trap:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5]
; GFX8V4-NEXT: s_trap 2
;
; GFX8V5-LABEL: llvm_trap:
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xc8
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V5-NEXT: s_trap 2
;
; GFX9V3-LABEL: llvm_trap:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5]
; GFX9V3-NEXT: s_trap 2
;
; GFX9V4-LABEL: llvm_trap:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_trap 2
;
; GFX9V5-LABEL: llvm_trap:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_trap 2
call void @llvm.trap()
unreachable
}
define amdgpu_kernel void @llvm_debugtrap() {
; GFX8V3-LABEL: llvm_debugtrap:
; GFX8V3: ; %bb.0:
; GFX8V3-NEXT: s_trap 3
;
; GFX8V4-LABEL: llvm_debugtrap:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_trap 3
;
; GFX8V5-LABEL: llvm_debugtrap:
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_trap 3
;
; GFX9V3-LABEL: llvm_debugtrap:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: s_trap 3
;
; GFX9V4-LABEL: llvm_debugtrap:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_trap 3
;
; GFX9V5-LABEL: llvm_debugtrap:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: s_trap 3
call void @llvm.debugtrap()
unreachable
}
define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) {
; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
; GFX8V3: ; %bb.0:
; GFX8V3-NEXT: v_mov_b32_e32 v0, s6
; GFX8V3-NEXT: v_mov_b32_e32 v1, s7
; GFX8V3-NEXT: s_add_u32 s0, s8, 8
; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V3-NEXT: s_addc_u32 s1, s9, 0
; GFX8V3-NEXT: s_waitcnt vmcnt(0)
; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V3-NEXT: s_waitcnt vmcnt(0)
; GFX8V3-NEXT: v_mov_b32_e32 v0, s4
; GFX8V3-NEXT: v_mov_b32_e32 v1, s5
; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX8V3-NEXT: v_mov_b32_e32 v2, s10
; GFX8V3-NEXT: v_mov_b32_e32 v3, s11
; GFX8V3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
; GFX8V3-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8V3-NEXT: s_waitcnt vmcnt(0)
; GFX8V3-NEXT: s_endpgm
;
; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: v_mov_b32_e32 v0, s6
; GFX8V4-NEXT: v_mov_b32_e32 v1, s7
; GFX8V4-NEXT: s_add_u32 s0, s8, 8
; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V4-NEXT: s_addc_u32 s1, s9, 0
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
; GFX8V4-NEXT: v_mov_b32_e32 v1, s1
; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX8V4-NEXT: v_mov_b32_e32 v2, s10
; GFX8V4-NEXT: v_mov_b32_e32 v3, s11
; GFX8V4-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
; GFX8V4-NEXT: v_mov_b32_e32 v1, s1
; GFX8V4-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
; GFX8V4-NEXT: s_endpgm
;
; GFX8V5-LABEL: llvm_amdgcn_queue_ptr:
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_add_u32 s0, s6, 8
; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V5-NEXT: s_addc_u32 s1, s7, 0
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
; GFX8V5-NEXT: v_mov_b32_e32 v1, s1
; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX8V5-NEXT: v_mov_b32_e32 v2, s8
; GFX8V5-NEXT: v_mov_b32_e32 v3, s9
; GFX8V5-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
; GFX8V5-NEXT: v_mov_b32_e32 v1, s1
; GFX8V5-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
; GFX9V3: ; %bb.0:
; GFX9V3-NEXT: v_mov_b32_e32 v2, 0
; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc
; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: v_mov_b32_e32 v0, s10
; GFX9V3-NEXT: v_mov_b32_e32 v1, s11
; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7
; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5
; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9V3-NEXT: s_waitcnt vmcnt(0)
; GFX9V3-NEXT: s_endpgm
;
; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: v_mov_b32_e32 v2, 0
; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: v_mov_b32_e32 v0, s10
; GFX9V4-NEXT: v_mov_b32_e32 v1, s11
; GFX9V4-NEXT: ; kill: killed $sgpr6_sgpr7
; GFX9V4-NEXT: ; kill: killed $sgpr4_sgpr5
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V4-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
; GFX9V4-NEXT: s_endpgm
;
; GFX9V5-LABEL: llvm_amdgcn_queue_ptr:
; GFX9V5: ; %bb.0:
; GFX9V5-NEXT: v_mov_b32_e32 v2, 0
; GFX9V5-NEXT: global_load_ubyte v0, v2, s[0:1] glc
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] offset:8 glc
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc
; GFX9V5-NEXT: ; kill: killed $sgpr0_sgpr1
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: v_mov_b32_e32 v0, s8
; GFX9V5-NEXT: v_mov_b32_e32 v1, s9
; GFX9V5-NEXT: ; kill: killed $sgpr4_sgpr5
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX9V5-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
; GFX9V5-NEXT: s_endpgm
%queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
%queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
%implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
%dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
ret void
}
declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
declare i64 @llvm.amdgcn.dispatch.id()
declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
declare i1 @llvm.amdgcn.is.shared(i8*)
declare i1 @llvm.amdgcn.is.private(i8*)
declare void @llvm.trap()
declare void @llvm.debugtrap()