
As suggested on
5ec884e5d8 (commitcomment-153707488)
this seems to fix the following tests when building with -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON:
LLVM :: CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
LLVM :: CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
LLVM :: CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
> This PR fixes test failures introduced in #127353 when expensive checks
> are enabled.
>
> For `llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll` and
> `llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll`, `s59`
> is no longer in live-ins because it is caller saved. Switch to `s55` in
> this PR.
1706 lines
78 KiB
LLVM
1706 lines
78 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck -check-prefix=GFX8 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX900 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10_1 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
|
|
|
|
%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
|
|
<16 x i32>, <7 x i32>, ; vgprs
|
|
i64 ; vcc
|
|
}
|
|
|
|
%asm.output2 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
|
|
<16 x i32>, <5 x i32>, ; vgprs
|
|
i64 ; vcc
|
|
}
|
|
|
|
%asm.output3 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, ; sgprs
|
|
<16 x i32>, <6 x i32>, ; vgprs
|
|
i64 ; vcc
|
|
}
|
|
|
|
; %alloca1 should end up materializing with s_mov_b32, but scc is
|
|
; unavailable.
|
|
;
|
|
; This is primarily to test gfx7 and gfx8, which do not have vector
|
|
; add with no carry.
|
|
;
|
|
define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 {
|
|
; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x101100
|
|
; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX7-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX7-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX7-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX7-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX7-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX7-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX7-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX7-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX7-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX7-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX7-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX7-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX7-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX7-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX7-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
|
|
; GFX7-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
|
|
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX7-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; use alloca0 v0
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040
|
|
; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32
|
|
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0
|
|
; GFX7-NEXT: v_readfirstlane_b32 s54, v0
|
|
; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX7-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX7-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX7-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX7-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX7-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX7-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX7-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX7-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX7-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX7-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX7-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX7-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX7-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX7-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX7-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX7-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x101100
|
|
; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
|
|
; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX8-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX8-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX8-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX8-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX8-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX8-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX8-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX8-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX8-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX8-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX8-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX8-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX8-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX8-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX8-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
|
|
; GFX8-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
|
|
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX8-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; use alloca0 v0
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040
|
|
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32
|
|
; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0
|
|
; GFX8-NEXT: v_readfirstlane_b32 s54, v0
|
|
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX8-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX8-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX8-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX8-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX8-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX8-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX8-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX8-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX8-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX8-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX8-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX8-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX8-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX8-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX8-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX8-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
|
|
; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
|
|
; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX900-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX900-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX900-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX900-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX900-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX900-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX900-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX900-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX900-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX900-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX900-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX900-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX900-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX900-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX900-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
|
|
; GFX900-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
|
|
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX900-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; use alloca0 v0
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32
|
|
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
|
|
; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
|
|
; GFX900-NEXT: v_readfirstlane_b32 s54, v0
|
|
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX900-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX900-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX900-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX900-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX900-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX900-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX900-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX900-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX900-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX900-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX900-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX900-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX900-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX900-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX900-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX900-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
|
|
; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
|
|
; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
|
|
; GFX942-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill
|
|
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GFX942-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX942-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX942-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX942-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX942-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX942-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX942-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX942-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX942-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX942-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX942-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX942-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX942-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX942-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX942-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX942-NEXT: s_add_i32 s0, s32, 64
|
|
; GFX942-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX942-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
|
|
; GFX942-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; use alloca0 v0
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: s_addc_u32 s59, s32, 0x4040
|
|
; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX942-NEXT: s_bitcmp1_b32 s59, 0
|
|
; GFX942-NEXT: s_bitset0_b32 s59, 0
|
|
; GFX942-NEXT: s_mov_b32 s54, s59
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX942-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX942-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX942-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX942-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX942-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX942-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX942-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX942-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX942-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX942-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX942-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX942-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX942-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX942-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX942-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX942-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
|
|
; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
|
|
; GFX942-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload
|
|
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX10_1: ; %bb.0:
|
|
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
|
|
; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
|
|
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
|
|
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
|
|
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; use alloca0 v0
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX10_1-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32
|
|
; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
|
|
; GFX10_1-NEXT: v_readfirstlane_b32 s54, v24
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
|
|
; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
|
|
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
|
|
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_1-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX10_3: ; %bb.0:
|
|
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
|
|
; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
|
|
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
|
|
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; use alloca0 v0
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX10_3-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32
|
|
; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
|
|
; GFX10_3-NEXT: v_readfirstlane_b32 s54, v24
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
|
|
; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
|
|
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
|
|
; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX11-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX11-NEXT: s_add_i32 s0, s32, 64
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
|
|
; GFX11-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use alloca0 v0
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX11-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX11-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX11-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX11-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX11-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX11-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX11-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX11-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX11-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX11-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX11-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX11-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX11-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX11-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040
|
|
; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_bitcmp1_b32 s59, 0
|
|
; GFX11-NEXT: s_bitset0_b32 s59, 0
|
|
; GFX11-NEXT: s_mov_b32 s54, s59
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX11-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX11-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX11-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX11-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX11-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX11-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX11-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX11-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX11-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX11-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX11-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX11-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX11-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX11-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX11-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX11-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
|
|
; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX12-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, s32
|
|
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use alloca0 v0
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX12-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX12-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX12-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX12-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX12-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX12-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX12-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX12-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX12-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX12-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX12-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX12-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX12-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX12-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX12-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: s_add_co_ci_u32 s59, s32, 0x4000
|
|
; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_bitcmp1_b32 s59, 0
|
|
; GFX12-NEXT: s_bitset0_b32 s59, 0
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 s54, s59
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX12-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX12-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX12-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX12-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX12-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX12-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX12-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX12-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX12-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX12-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX12-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX12-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX12-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX12-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX12-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX12-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
|
|
%alloca1 = alloca i32, align 4, addrspace(5)
|
|
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
|
|
|
|
; Force no SGPRs to be available for the carry-out of the vector add.
|
|
%asm = call %asm.output asm sideeffect
|
|
"; def $0, $1, $2, $3, $4, $5, $6, $7, $8",
|
|
"={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:22]},={vcc}"()
|
|
|
|
%s0 = extractvalue %asm.output %asm, 0
|
|
%s1 = extractvalue %asm.output %asm, 1
|
|
%s2 = extractvalue %asm.output %asm, 2
|
|
%s3 = extractvalue %asm.output %asm, 3
|
|
%s4 = extractvalue %asm.output %asm, 4
|
|
%s5 = extractvalue %asm.output %asm, 5
|
|
|
|
%v0 = extractvalue %asm.output %asm, 6
|
|
%v1 = extractvalue %asm.output %asm, 7
|
|
|
|
%vcc = extractvalue %asm.output %asm, 8
|
|
|
|
; scc is unavailable since it is live in
|
|
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
|
|
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s54},{scc}"(
|
|
<16 x i32> %s0,
|
|
<16 x i32> %s1,
|
|
<16 x i32> %s2,
|
|
<8 x i32> %s3,
|
|
<2 x i32> %s4,
|
|
i32 %s5,
|
|
<16 x i32> %v0,
|
|
<7 x i32> %v1,
|
|
i64 %vcc,
|
|
ptr addrspace(5) %alloca1,
|
|
i32 0) ; use of scc
|
|
|
|
ret void
|
|
}
|
|
|
|
; FIXME: This would have test FI at offset 0, but other objects get
|
|
; assigned there. This shows a non-0, but inline immediate that can
|
|
; fold directly into the address computation.
|
|
define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset() #1 {
|
|
; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x100400
|
|
; GFX7-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX7-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX7-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX7-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX7-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX7-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX7-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX7-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX7-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX7-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX7-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX7-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX7-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX7-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX7-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX7-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX7-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX7-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32
|
|
; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22
|
|
; GFX7-NEXT: v_readfirstlane_b32 s54, v22
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX7-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX7-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX7-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX7-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX7-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX7-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX7-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX7-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX7-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX7-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX7-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX7-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX7-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX7-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX7-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX7-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x100400
|
|
; GFX7-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x100400
|
|
; GFX8-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX8-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX8-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX8-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX8-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX8-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX8-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX8-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX8-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX8-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX8-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX8-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX8-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX8-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX8-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX8-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX8-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX8-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32
|
|
; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22
|
|
; GFX8-NEXT: v_readfirstlane_b32 s54, v22
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX8-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX8-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX8-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX8-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX8-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX8-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX8-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX8-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX8-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX8-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX8-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX8-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX8-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX8-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX8-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX8-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x100400
|
|
; GFX8-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x100400
|
|
; GFX900-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX900-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX900-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX900-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX900-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX900-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX900-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX900-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX900-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX900-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX900-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX900-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX900-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX900-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX900-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX900-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX900-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX900-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32
|
|
; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX900-NEXT: v_add_u32_e32 v22, 16, v22
|
|
; GFX900-NEXT: v_readfirstlane_b32 s54, v22
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX900-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX900-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX900-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX900-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX900-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX900-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX900-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX900-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX900-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX900-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX900-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX900-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX900-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX900-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX900-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX900-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x100400
|
|
; GFX900-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
|
|
; GFX942-NEXT: s_add_i32 s2, s32, 0x4010
|
|
; GFX942-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill
|
|
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GFX942-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX942-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX942-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX942-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX942-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX942-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX942-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX942-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX942-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX942-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX942-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX942-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX942-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX942-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX942-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX942-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
|
|
; GFX942-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: s_addc_u32 s59, s32, 16
|
|
; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX942-NEXT: s_bitcmp1_b32 s59, 0
|
|
; GFX942-NEXT: s_bitset0_b32 s59, 0
|
|
; GFX942-NEXT: s_mov_b32 s54, s59
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX942-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX942-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX942-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX942-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX942-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX942-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX942-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX942-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX942-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX942-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX942-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX942-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX942-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX942-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX942-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX942-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
|
|
; GFX942-NEXT: s_add_i32 s2, s32, 0x4010
|
|
; GFX942-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload
|
|
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX10_1: ; %bb.0:
|
|
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200
|
|
; GFX10_1-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
|
|
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
|
|
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX10_1-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32
|
|
; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22
|
|
; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX10_1-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX10_1-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX10_1-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX10_1-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200
|
|
; GFX10_1-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
|
|
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
|
|
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_1-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX10_3: ; %bb.0:
|
|
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200
|
|
; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
|
|
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX10_3-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32
|
|
; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22
|
|
; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX10_3-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX10_3-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX10_3-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX10_3-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200
|
|
; GFX10_3-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
|
|
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX11-NEXT: s_add_i32 s1, s32, 0x4010
|
|
; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX11-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX11-NEXT: s_and_b32 s59, 0, exec_lo
|
|
; GFX11-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX11-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX11-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX11-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX11-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX11-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX11-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX11-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX11-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX11-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX11-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX11-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX11-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX11-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX11-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX11-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_addc_u32 s59, s32, 16
|
|
; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_bitcmp1_b32 s59, 0
|
|
; GFX11-NEXT: s_bitset0_b32 s59, 0
|
|
; GFX11-NEXT: s_mov_b32 s54, s59
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX11-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX11-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX11-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX11-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX11-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX11-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX11-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX11-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX11-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX11-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX11-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX11-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX11-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX11-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX11-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX11-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX11-NEXT: s_add_i32 s1, s32, 0x4010
|
|
; GFX11-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX12-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX12-NEXT: v_writelane_b32 v21, s30, 0
|
|
; GFX12-NEXT: s_and_b32 s59, 0, exec_lo
|
|
; GFX12-NEXT: v_writelane_b32 v21, s31, 1
|
|
; GFX12-NEXT: v_writelane_b32 v21, s33, 2
|
|
; GFX12-NEXT: v_writelane_b32 v21, s34, 3
|
|
; GFX12-NEXT: v_writelane_b32 v21, s35, 4
|
|
; GFX12-NEXT: v_writelane_b32 v21, s36, 5
|
|
; GFX12-NEXT: v_writelane_b32 v21, s37, 6
|
|
; GFX12-NEXT: v_writelane_b32 v21, s38, 7
|
|
; GFX12-NEXT: v_writelane_b32 v21, s39, 8
|
|
; GFX12-NEXT: v_writelane_b32 v21, s48, 9
|
|
; GFX12-NEXT: v_writelane_b32 v21, s49, 10
|
|
; GFX12-NEXT: v_writelane_b32 v21, s50, 11
|
|
; GFX12-NEXT: v_writelane_b32 v21, s51, 12
|
|
; GFX12-NEXT: v_writelane_b32 v21, s52, 13
|
|
; GFX12-NEXT: v_writelane_b32 v21, s53, 14
|
|
; GFX12-NEXT: v_writelane_b32 v21, s54, 15
|
|
; GFX12-NEXT: v_writelane_b32 v21, s55, 16
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX12-NEXT: s_mov_b32 s54, s32
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX12-NEXT: v_readlane_b32 s55, v21, 16
|
|
; GFX12-NEXT: v_readlane_b32 s54, v21, 15
|
|
; GFX12-NEXT: v_readlane_b32 s53, v21, 14
|
|
; GFX12-NEXT: v_readlane_b32 s52, v21, 13
|
|
; GFX12-NEXT: v_readlane_b32 s51, v21, 12
|
|
; GFX12-NEXT: v_readlane_b32 s50, v21, 11
|
|
; GFX12-NEXT: v_readlane_b32 s49, v21, 10
|
|
; GFX12-NEXT: v_readlane_b32 s48, v21, 9
|
|
; GFX12-NEXT: v_readlane_b32 s39, v21, 8
|
|
; GFX12-NEXT: v_readlane_b32 s38, v21, 7
|
|
; GFX12-NEXT: v_readlane_b32 s37, v21, 6
|
|
; GFX12-NEXT: v_readlane_b32 s36, v21, 5
|
|
; GFX12-NEXT: v_readlane_b32 s35, v21, 4
|
|
; GFX12-NEXT: v_readlane_b32 s34, v21, 3
|
|
; GFX12-NEXT: v_readlane_b32 s33, v21, 2
|
|
; GFX12-NEXT: v_readlane_b32 s31, v21, 1
|
|
; GFX12-NEXT: v_readlane_b32 s30, v21, 0
|
|
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX12-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
|
|
|
|
; Force no SGPRs to be available for the carry-out of the vector add.
|
|
%asm = call %asm.output2 asm sideeffect
|
|
"; def $0, $1, $2, $3, $4, $5, $6, $7, $8",
|
|
"={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:20]},={vcc}"()
|
|
|
|
%s0 = extractvalue %asm.output2 %asm, 0
|
|
%s1 = extractvalue %asm.output2 %asm, 1
|
|
%s2 = extractvalue %asm.output2 %asm, 2
|
|
%s3 = extractvalue %asm.output2 %asm, 3
|
|
%s4 = extractvalue %asm.output2 %asm, 4
|
|
%s5 = extractvalue %asm.output2 %asm, 5
|
|
|
|
%v0 = extractvalue %asm.output2 %asm, 6
|
|
%v1 = extractvalue %asm.output2 %asm, 7
|
|
|
|
%vcc = extractvalue %asm.output2 %asm, 8
|
|
|
|
; scc is unavailable since it is live in
|
|
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10",
|
|
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s54},{scc}"(
|
|
<16 x i32> %s0,
|
|
<16 x i32> %s1,
|
|
<16 x i32> %s2,
|
|
<8 x i32> %s3,
|
|
<2 x i32> %s4,
|
|
i32 %s5,
|
|
<16 x i32> %v0,
|
|
<5 x i32> %v1,
|
|
i64 %vcc,
|
|
ptr addrspace(5) %alloca0,
|
|
i32 0) ; use of scc
|
|
|
|
ret void
|
|
}
|
|
|
|
; This case isn't using SGPRs yet.
|
|
; FIXME: Should also use one more VGPR, but currently fails to allocate on gfx8.
|
|
define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset() #0 {
|
|
; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
|
|
; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x201100
|
|
; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX7-NEXT: v_writelane_b32 v23, s28, 17
|
|
; GFX7-NEXT: v_writelane_b32 v23, s29, 18
|
|
; GFX7-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX7-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX7-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX7-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX7-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX7-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX7-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX7-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX7-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX7-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX7-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX7-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX7-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX7-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX7-NEXT: s_lshr_b32 s5, s32, 6
|
|
; GFX7-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
|
|
; GFX7-NEXT: s_add_i32 s4, s5, 0x4240
|
|
; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
|
|
; GFX7-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
|
|
; GFX7-NEXT: v_writelane_b32 v22, s4, 0
|
|
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX7-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; use alloca0 v0
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: v_readlane_b32 s54, v22, 0
|
|
; GFX7-NEXT: ;;#ASMSTART
|
|
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX7-NEXT: ;;#ASMEND
|
|
; GFX7-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX7-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX7-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX7-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX7-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX7-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX7-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX7-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX7-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX7-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX7-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX7-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX7-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX7-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX7-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX7-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX7-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX7-NEXT: v_readlane_b32 s28, v23, 17
|
|
; GFX7-NEXT: v_readlane_b32 s29, v23, 18
|
|
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
|
|
; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX7-NEXT: s_add_i32 s6, s32, 0x201100
|
|
; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
|
|
; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x201100
|
|
; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX8-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX8-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX8-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX8-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX8-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX8-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX8-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX8-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX8-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX8-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX8-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX8-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX8-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX8-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX8-NEXT: s_lshr_b32 s5, s32, 6
|
|
; GFX8-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
|
|
; GFX8-NEXT: s_add_i32 s4, s5, 0x4240
|
|
; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
|
|
; GFX8-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
|
|
; GFX8-NEXT: v_writelane_b32 v22, s4, 0
|
|
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX8-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; use alloca0 v0
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX8-NEXT: v_readlane_b32 s54, v22, 0
|
|
; GFX8-NEXT: ;;#ASMSTART
|
|
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX8-NEXT: ;;#ASMEND
|
|
; GFX8-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX8-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX8-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX8-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX8-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX8-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX8-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX8-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX8-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX8-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX8-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX8-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX8-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX8-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX8-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX8-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX8-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
|
|
; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: s_add_i32 s6, s32, 0x201100
|
|
; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX900: ; %bb.0:
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
|
|
; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x201100
|
|
; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
|
|
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX900-NEXT: v_writelane_b32 v23, s30, 0
|
|
; GFX900-NEXT: v_writelane_b32 v23, s31, 1
|
|
; GFX900-NEXT: v_writelane_b32 v23, s33, 2
|
|
; GFX900-NEXT: v_writelane_b32 v23, s34, 3
|
|
; GFX900-NEXT: v_writelane_b32 v23, s35, 4
|
|
; GFX900-NEXT: v_writelane_b32 v23, s36, 5
|
|
; GFX900-NEXT: v_writelane_b32 v23, s37, 6
|
|
; GFX900-NEXT: v_writelane_b32 v23, s38, 7
|
|
; GFX900-NEXT: v_writelane_b32 v23, s39, 8
|
|
; GFX900-NEXT: v_writelane_b32 v23, s48, 9
|
|
; GFX900-NEXT: v_writelane_b32 v23, s49, 10
|
|
; GFX900-NEXT: v_writelane_b32 v23, s50, 11
|
|
; GFX900-NEXT: v_writelane_b32 v23, s51, 12
|
|
; GFX900-NEXT: v_writelane_b32 v23, s52, 13
|
|
; GFX900-NEXT: s_lshr_b32 s5, s32, 6
|
|
; GFX900-NEXT: v_writelane_b32 v23, s53, 14
|
|
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
|
|
; GFX900-NEXT: s_add_i32 s4, s5, 0x4240
|
|
; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
|
|
; GFX900-NEXT: v_writelane_b32 v23, s54, 15
|
|
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
|
|
; GFX900-NEXT: v_writelane_b32 v22, s4, 0
|
|
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
|
|
; GFX900-NEXT: v_writelane_b32 v23, s55, 16
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; use alloca0 v0
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX900-NEXT: v_readlane_b32 s54, v22, 0
|
|
; GFX900-NEXT: ;;#ASMSTART
|
|
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX900-NEXT: ;;#ASMEND
|
|
; GFX900-NEXT: v_readlane_b32 s55, v23, 16
|
|
; GFX900-NEXT: v_readlane_b32 s54, v23, 15
|
|
; GFX900-NEXT: v_readlane_b32 s53, v23, 14
|
|
; GFX900-NEXT: v_readlane_b32 s52, v23, 13
|
|
; GFX900-NEXT: v_readlane_b32 s51, v23, 12
|
|
; GFX900-NEXT: v_readlane_b32 s50, v23, 11
|
|
; GFX900-NEXT: v_readlane_b32 s49, v23, 10
|
|
; GFX900-NEXT: v_readlane_b32 s48, v23, 9
|
|
; GFX900-NEXT: v_readlane_b32 s39, v23, 8
|
|
; GFX900-NEXT: v_readlane_b32 s38, v23, 7
|
|
; GFX900-NEXT: v_readlane_b32 s37, v23, 6
|
|
; GFX900-NEXT: v_readlane_b32 s36, v23, 5
|
|
; GFX900-NEXT: v_readlane_b32 s35, v23, 4
|
|
; GFX900-NEXT: v_readlane_b32 s34, v23, 3
|
|
; GFX900-NEXT: v_readlane_b32 s33, v23, 2
|
|
; GFX900-NEXT: v_readlane_b32 s31, v23, 1
|
|
; GFX900-NEXT: v_readlane_b32 s30, v23, 0
|
|
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
|
|
; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX900-NEXT: s_add_i32 s6, s32, 0x201100
|
|
; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
|
|
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GFX900-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
|
|
; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
|
|
; GFX942-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill
|
|
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GFX942-NEXT: v_writelane_b32 v22, s30, 0
|
|
; GFX942-NEXT: v_writelane_b32 v22, s31, 1
|
|
; GFX942-NEXT: v_writelane_b32 v22, s33, 2
|
|
; GFX942-NEXT: v_writelane_b32 v22, s34, 3
|
|
; GFX942-NEXT: v_writelane_b32 v22, s35, 4
|
|
; GFX942-NEXT: v_writelane_b32 v22, s36, 5
|
|
; GFX942-NEXT: v_writelane_b32 v22, s37, 6
|
|
; GFX942-NEXT: v_writelane_b32 v22, s38, 7
|
|
; GFX942-NEXT: v_writelane_b32 v22, s39, 8
|
|
; GFX942-NEXT: v_writelane_b32 v22, s48, 9
|
|
; GFX942-NEXT: v_writelane_b32 v22, s49, 10
|
|
; GFX942-NEXT: v_writelane_b32 v22, s50, 11
|
|
; GFX942-NEXT: v_writelane_b32 v22, s51, 12
|
|
; GFX942-NEXT: v_writelane_b32 v22, s52, 13
|
|
; GFX942-NEXT: v_writelane_b32 v22, s53, 14
|
|
; GFX942-NEXT: s_add_i32 s0, s32, 64
|
|
; GFX942-NEXT: v_writelane_b32 v22, s54, 15
|
|
; GFX942-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX942-NEXT: v_writelane_b32 v22, s55, 16
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; use alloca0 v0
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: s_add_i32 s58, s32, 0x4240
|
|
; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
|
|
; GFX942-NEXT: s_mov_b32 s54, s58
|
|
; GFX942-NEXT: ;;#ASMSTART
|
|
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX942-NEXT: ;;#ASMEND
|
|
; GFX942-NEXT: v_readlane_b32 s55, v22, 16
|
|
; GFX942-NEXT: v_readlane_b32 s54, v22, 15
|
|
; GFX942-NEXT: v_readlane_b32 s53, v22, 14
|
|
; GFX942-NEXT: v_readlane_b32 s52, v22, 13
|
|
; GFX942-NEXT: v_readlane_b32 s51, v22, 12
|
|
; GFX942-NEXT: v_readlane_b32 s50, v22, 11
|
|
; GFX942-NEXT: v_readlane_b32 s49, v22, 10
|
|
; GFX942-NEXT: v_readlane_b32 s48, v22, 9
|
|
; GFX942-NEXT: v_readlane_b32 s39, v22, 8
|
|
; GFX942-NEXT: v_readlane_b32 s38, v22, 7
|
|
; GFX942-NEXT: v_readlane_b32 s37, v22, 6
|
|
; GFX942-NEXT: v_readlane_b32 s36, v22, 5
|
|
; GFX942-NEXT: v_readlane_b32 s35, v22, 4
|
|
; GFX942-NEXT: v_readlane_b32 s34, v22, 3
|
|
; GFX942-NEXT: v_readlane_b32 s33, v22, 2
|
|
; GFX942-NEXT: v_readlane_b32 s31, v22, 1
|
|
; GFX942-NEXT: v_readlane_b32 s30, v22, 0
|
|
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
|
|
; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
|
|
; GFX942-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload
|
|
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX10_1: ; %bb.0:
|
|
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
|
|
; GFX10_1-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
|
|
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
|
|
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0
|
|
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
|
|
; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5
|
|
; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1
|
|
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
|
|
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; use alloca0 v0
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s33, 2
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s34, 3
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s54, 15
|
|
; GFX10_1-NEXT: v_writelane_b32 v22, s55, 16
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX10_1-NEXT: s_mov_b32 s54, s58
|
|
; GFX10_1-NEXT: ;;#ASMSTART
|
|
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX10_1-NEXT: ;;#ASMEND
|
|
; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16
|
|
; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15
|
|
; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14
|
|
; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13
|
|
; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12
|
|
; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11
|
|
; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10
|
|
; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9
|
|
; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8
|
|
; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7
|
|
; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6
|
|
; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5
|
|
; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4
|
|
; GFX10_1-NEXT: v_readlane_b32 s34, v22, 3
|
|
; GFX10_1-NEXT: v_readlane_b32 s33, v22, 2
|
|
; GFX10_1-NEXT: v_readlane_b32 s31, v22, 1
|
|
; GFX10_1-NEXT: v_readlane_b32 s30, v22, 0
|
|
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
|
|
; GFX10_1-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
|
|
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
|
|
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_1-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX10_3: ; %bb.0:
|
|
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
|
|
; GFX10_3-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
|
|
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0
|
|
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
|
|
; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5
|
|
; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1
|
|
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
|
|
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; use alloca0 v0
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s33, 2
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s34, 3
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s54, 15
|
|
; GFX10_3-NEXT: v_writelane_b32 v22, s55, 16
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX10_3-NEXT: s_mov_b32 s54, s58
|
|
; GFX10_3-NEXT: ;;#ASMSTART
|
|
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX10_3-NEXT: ;;#ASMEND
|
|
; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16
|
|
; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15
|
|
; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14
|
|
; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13
|
|
; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12
|
|
; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11
|
|
; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10
|
|
; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9
|
|
; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8
|
|
; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7
|
|
; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6
|
|
; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5
|
|
; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4
|
|
; GFX10_3-NEXT: v_readlane_b32 s34, v22, 3
|
|
; GFX10_3-NEXT: v_readlane_b32 s33, v22, 2
|
|
; GFX10_3-NEXT: v_readlane_b32 s31, v22, 1
|
|
; GFX10_3-NEXT: v_readlane_b32 s30, v22, 0
|
|
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
|
|
; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
|
|
; GFX10_3-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
|
|
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
|
|
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
|
|
; GFX11-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX11-NEXT: v_writelane_b32 v22, s30, 0
|
|
; GFX11-NEXT: s_add_i32 s0, s32, 64
|
|
; GFX11-NEXT: s_add_i32 s58, s32, 0x4240
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
|
|
; GFX11-NEXT: v_writelane_b32 v22, s31, 1
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use alloca0 v0
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_writelane_b32 v22, s33, 2
|
|
; GFX11-NEXT: v_writelane_b32 v22, s34, 3
|
|
; GFX11-NEXT: v_writelane_b32 v22, s35, 4
|
|
; GFX11-NEXT: v_writelane_b32 v22, s36, 5
|
|
; GFX11-NEXT: v_writelane_b32 v22, s37, 6
|
|
; GFX11-NEXT: v_writelane_b32 v22, s38, 7
|
|
; GFX11-NEXT: v_writelane_b32 v22, s39, 8
|
|
; GFX11-NEXT: v_writelane_b32 v22, s48, 9
|
|
; GFX11-NEXT: v_writelane_b32 v22, s49, 10
|
|
; GFX11-NEXT: v_writelane_b32 v22, s50, 11
|
|
; GFX11-NEXT: v_writelane_b32 v22, s51, 12
|
|
; GFX11-NEXT: v_writelane_b32 v22, s52, 13
|
|
; GFX11-NEXT: v_writelane_b32 v22, s53, 14
|
|
; GFX11-NEXT: v_writelane_b32 v22, s54, 15
|
|
; GFX11-NEXT: v_writelane_b32 v22, s55, 16
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX11-NEXT: s_mov_b32 s54, s58
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_readlane_b32 s55, v22, 16
|
|
; GFX11-NEXT: v_readlane_b32 s54, v22, 15
|
|
; GFX11-NEXT: v_readlane_b32 s53, v22, 14
|
|
; GFX11-NEXT: v_readlane_b32 s52, v22, 13
|
|
; GFX11-NEXT: v_readlane_b32 s51, v22, 12
|
|
; GFX11-NEXT: v_readlane_b32 s50, v22, 11
|
|
; GFX11-NEXT: v_readlane_b32 s49, v22, 10
|
|
; GFX11-NEXT: v_readlane_b32 s48, v22, 9
|
|
; GFX11-NEXT: v_readlane_b32 s39, v22, 8
|
|
; GFX11-NEXT: v_readlane_b32 s38, v22, 7
|
|
; GFX11-NEXT: v_readlane_b32 s37, v22, 6
|
|
; GFX11-NEXT: v_readlane_b32 s36, v22, 5
|
|
; GFX11-NEXT: v_readlane_b32 s35, v22, 4
|
|
; GFX11-NEXT: v_readlane_b32 s34, v22, 3
|
|
; GFX11-NEXT: v_readlane_b32 s33, v22, 2
|
|
; GFX11-NEXT: v_readlane_b32 s31, v22, 1
|
|
; GFX11-NEXT: v_readlane_b32 s30, v22, 0
|
|
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
|
|
; GFX11-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload
|
|
; GFX11-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX12-NEXT: s_wait_expcnt 0x0
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: s_wait_bvhcnt 0x0
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX12-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX12-NEXT: v_writelane_b32 v22, s30, 0
|
|
; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, s32
|
|
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use alloca0 v0
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: v_writelane_b32 v22, s31, 1
|
|
; GFX12-NEXT: v_writelane_b32 v22, s33, 2
|
|
; GFX12-NEXT: v_writelane_b32 v22, s34, 3
|
|
; GFX12-NEXT: v_writelane_b32 v22, s35, 4
|
|
; GFX12-NEXT: v_writelane_b32 v22, s36, 5
|
|
; GFX12-NEXT: v_writelane_b32 v22, s37, 6
|
|
; GFX12-NEXT: v_writelane_b32 v22, s38, 7
|
|
; GFX12-NEXT: v_writelane_b32 v22, s39, 8
|
|
; GFX12-NEXT: v_writelane_b32 v22, s48, 9
|
|
; GFX12-NEXT: v_writelane_b32 v22, s49, 10
|
|
; GFX12-NEXT: v_writelane_b32 v22, s50, 11
|
|
; GFX12-NEXT: v_writelane_b32 v22, s51, 12
|
|
; GFX12-NEXT: v_writelane_b32 v22, s52, 13
|
|
; GFX12-NEXT: v_writelane_b32 v22, s53, 14
|
|
; GFX12-NEXT: v_writelane_b32 v22, s54, 15
|
|
; GFX12-NEXT: v_writelane_b32 v22, s55, 16
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 s54, s58
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: v_readlane_b32 s55, v22, 16
|
|
; GFX12-NEXT: v_readlane_b32 s54, v22, 15
|
|
; GFX12-NEXT: v_readlane_b32 s53, v22, 14
|
|
; GFX12-NEXT: v_readlane_b32 s52, v22, 13
|
|
; GFX12-NEXT: v_readlane_b32 s51, v22, 12
|
|
; GFX12-NEXT: v_readlane_b32 s50, v22, 11
|
|
; GFX12-NEXT: v_readlane_b32 s49, v22, 10
|
|
; GFX12-NEXT: v_readlane_b32 s48, v22, 9
|
|
; GFX12-NEXT: v_readlane_b32 s39, v22, 8
|
|
; GFX12-NEXT: v_readlane_b32 s38, v22, 7
|
|
; GFX12-NEXT: v_readlane_b32 s37, v22, 6
|
|
; GFX12-NEXT: v_readlane_b32 s36, v22, 5
|
|
; GFX12-NEXT: v_readlane_b32 s35, v22, 4
|
|
; GFX12-NEXT: v_readlane_b32 s34, v22, 3
|
|
; GFX12-NEXT: v_readlane_b32 s33, v22, 2
|
|
; GFX12-NEXT: v_readlane_b32 s31, v22, 1
|
|
; GFX12-NEXT: v_readlane_b32 s30, v22, 0
|
|
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
|
|
; GFX12-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload
|
|
; GFX12-NEXT: s_wait_alu 0xfffe
|
|
; GFX12-NEXT: s_mov_b32 exec_lo, s0
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
|
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
|
|
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
|
|
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
|
|
|
|
; Force no SGPRs to be available for the carry-out of the vector add.
|
|
%asm = call %asm.output3 asm sideeffect
|
|
"; def $0, $1, $2, $3, $4, $5, $6, $7",
|
|
"={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={v[0:15]},={v[16:21]},={vcc}"()
|
|
|
|
%s0 = extractvalue %asm.output3 %asm, 0
|
|
%s1 = extractvalue %asm.output3 %asm, 1
|
|
%s2 = extractvalue %asm.output3 %asm, 2
|
|
%s3 = extractvalue %asm.output3 %asm, 3
|
|
%s4 = extractvalue %asm.output3 %asm, 4
|
|
|
|
%v0 = extractvalue %asm.output3 %asm, 5
|
|
%v1 = extractvalue %asm.output3 %asm, 6
|
|
|
|
%vcc = extractvalue %asm.output3 %asm, 7
|
|
|
|
%alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 128
|
|
|
|
; scc is unavailable since it is live in
|
|
call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9",
|
|
"{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s54},{scc}"(
|
|
<16 x i32> %s0,
|
|
<16 x i32> %s1,
|
|
<16 x i32> %s2,
|
|
<8 x i32> %s3,
|
|
<2 x i32> %s4,
|
|
<16 x i32> %v0,
|
|
<6 x i32> %v1,
|
|
i64 %vcc,
|
|
ptr addrspace(5) %alloca1.offset,
|
|
i32 0) ; use of scc
|
|
|
|
ret void
|
|
}
|
|
|
|
; For gfx8/gfx9, this should enforce a budget of 24 VGPRs, and 60 SGPRs (4
|
|
; are reserved at the end for xnack + vcc).
|
|
attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" }
|
|
attributes #1 = { nounwind alignstack=16 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" }
|