[AMDGPU] Do not fold an immediate into instructions with frame indexes (#151263)

Do not fold an immediate into an instruction that already has a frame
index operand. A frame index could possibly turn out to be another immediate.

Fixes: SWDEV-536263

---------

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
This commit is contained in:
Changpeng Fang 2025-08-06 11:47:37 -07:00 committed by GitHub
parent 35bd40d321
commit 32161e9de3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 220 additions and 50 deletions

View File

@ -6122,10 +6122,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
!Op.isIdenticalTo(*MO))
return false;
// Do not fold a frame index into an instruction that already has a frame
// index. The frame index handling code doesn't handle fixing up operand
// constraints if there are multiple indexes.
if (Op.isFI() && MO->isFI())
// Do not fold a non-inlineable and non-register operand into an
// instruction that already has a frame index. The frame index handling
// code could not handle well when a frame index co-exists with another
// non-register operand, unless that operand is an inlineable immediate.
if (Op.isFI())
return false;
}
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&

View File

@ -1917,8 +1917,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_movk_i32 s0, 0x3e84
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@ -1933,7 +1934,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3e84
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0
@ -1945,10 +1947,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX942-LABEL: store_load_large_imm_offset_kernel:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: v_mov_b32_e32 v0, 13
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v0, 15
; GFX942-NEXT: s_movk_i32 s0, 0x3e84
; GFX942-NEXT: s_add_i32 s0, s0, 4
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@ -1958,7 +1961,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX11-LABEL: store_load_large_imm_offset_kernel:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
; GFX11-NEXT: s_movk_i32 s0, 0x3e84
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s0, s0, 4
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@ -1986,8 +1991,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@ -2002,7 +2008,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@ -2014,10 +2021,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
; UNALIGNED_GFX942: ; %bb.0: ; %bb
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@ -2027,7 +2035,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
; UNALIGNED_GFX11: ; %bb.0: ; %bb
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@ -2061,11 +2071,13 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-LABEL: store_load_large_imm_offset_foo:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
; GFX9-NEXT: v_mov_b32_e32 v0, 13
; GFX9-NEXT: s_add_i32 s1, s32, s0
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
; GFX9-NEXT: s_add_i32 s0, s1, 4
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@ -2076,8 +2088,10 @@ define void @store_load_large_imm_offset_foo() {
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
; GFX10-NEXT: s_add_i32 s1, s32, s0
; GFX10-NEXT: s_add_i32 s0, s1, 4
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0
@ -2089,11 +2103,13 @@ define void @store_load_large_imm_offset_foo() {
; GFX942-LABEL: store_load_large_imm_offset_foo:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
; GFX942-NEXT: v_mov_b32_e32 v0, 13
; GFX942-NEXT: s_add_i32 s1, s32, s0
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v0, 15
; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
; GFX942-NEXT: s_add_i32 s0, s1, 4
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@ -2104,7 +2120,10 @@ define void @store_load_large_imm_offset_foo() {
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s1, s32, s0
; GFX11-NEXT: s_add_i32 s0, s1, 4
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@ -2133,11 +2152,13 @@ define void @store_load_large_imm_offset_foo() {
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
; UNALIGNED_GFX9: ; %bb.0: ; %bb
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@ -2148,8 +2169,10 @@ define void @store_load_large_imm_offset_foo() {
; UNALIGNED_GFX10: ; %bb.0: ; %bb
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@ -2161,11 +2184,13 @@ define void @store_load_large_imm_offset_foo() {
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
; UNALIGNED_GFX942: ; %bb.0: ; %bb
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@ -2176,7 +2201,10 @@ define void @store_load_large_imm_offset_foo() {
; UNALIGNED_GFX11: ; %bb.0: ; %bb
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc

View File

@ -3621,7 +3621,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_movk_i32 s0, 0x3004
; GFX9-NEXT: s_movk_i32 s0, 0x3000
; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-NEXT: s_waitcnt vmcnt(0)
@ -3637,7 +3638,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3804
; GFX10-NEXT: s_movk_i32 s0, 0x3800
; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -3682,7 +3684,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:4
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3004
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
@ -3716,8 +3719,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800
; GFX1010-PAL-NEXT: s_mov_b32 s1, 0
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3804
; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -3739,7 +3743,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3804
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800
; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -3785,10 +3790,12 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-LABEL: store_load_large_imm_offset_foo:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_movk_i32 s0, 0x3000
; GFX9-NEXT: v_mov_b32_e32 v0, 13
; GFX9-NEXT: s_add_i32 s1, s32, s0
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_i32 s0, s32, 0x3004
; GFX9-NEXT: s_add_i32 s0, s1, 4
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-NEXT: s_waitcnt vmcnt(0)
@ -3800,8 +3807,10 @@ define void @store_load_large_imm_offset_foo() {
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: s_movk_i32 s0, 0x3800
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_add_i32 s0, s32, 0x3804
; GFX10-NEXT: s_add_i32 s1, s32, s0
; GFX10-NEXT: s_add_i32 s0, s1, 4
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
@ -3843,10 +3852,12 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-PAL-LABEL: store_load_large_imm_offset_foo:
; GFX9-PAL: ; %bb.0: ; %bb
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX9-PAL-NEXT: s_add_i32 s1, s32, s0
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x3004
; GFX9-PAL-NEXT: s_add_i32 s0, s1, 4
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
@ -3872,8 +3883,10 @@ define void @store_load_large_imm_offset_foo() {
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x3804
; GFX10-PAL-NEXT: s_add_i32 s1, s32, s0
; GFX10-PAL-NEXT: s_add_i32 s0, s1, 4
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664

View File

@ -75,7 +75,8 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_0
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 256, implicit-def $scc
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 256
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]], implicit-def $scc
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:sreg_32 = S_MOV_B32 %stack.0

View File

@ -46,7 +46,8 @@ body: |
%2:sreg_32 = S_LSHL2_ADD_U32 %0, %1, implicit-def $scc
...
# GCN-LABEL: name: test_frameindex{{$}}
# GCN: %1:sreg_32 = S_ADD_I32 %stack.0, 70
# GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 70
# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]]
---
name: test_frameindex
tracksRegLiveness: true

View File

@ -360,7 +360,8 @@ entry:
; s_add_i32.
; GCN-LABEL: {{^}}fi_sop2_s_add_u32_literal_error:
; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010
; GCN: s_movk_i32 [[S_MOVK_I32_:s[0-9]+]], 0x1000
; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0x1010, [[S_MOVK_I32_]]
; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0
define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 {
entry:

View File

@ -6,16 +6,24 @@ define amdgpu_gfx [13 x i32] @issue130120() {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_add_i32 s0, s32, 0xf0
; CHECK-NEXT: s_add_i32 s1, s32, 0xf4
; CHECK-NEXT: s_add_i32 s2, s32, 0xf8
; CHECK-NEXT: s_add_i32 s3, s32, 0xfc
; CHECK-NEXT: s_movk_i32 s1, 0xf4
; CHECK-NEXT: s_movk_i32 s2, 0xf8
; CHECK-NEXT: s_movk_i32 s3, 0xfc
; CHECK-NEXT: s_movk_i32 s34, 0x100
; CHECK-NEXT: v_mov_b32_e32 v1, v0
; CHECK-NEXT: s_add_i32 s34, s32, 0x100
; CHECK-NEXT: s_add_i32 s35, s32, 0x104
; CHECK-NEXT: s_add_i32 s36, s32, 0x108
; CHECK-NEXT: s_add_i32 s37, s32, 0x110
; CHECK-NEXT: s_add_i32 s38, s32, 0x120
; CHECK-NEXT: s_movk_i32 s35, 0x104
; CHECK-NEXT: s_movk_i32 s36, 0x108
; CHECK-NEXT: s_movk_i32 s37, 0x110
; CHECK-NEXT: s_movk_i32 s38, 0x120
; CHECK-NEXT: s_add_i32 s0, s32, 0xf0
; CHECK-NEXT: s_add_i32 s1, s32, s1
; CHECK-NEXT: s_add_i32 s2, s32, s2
; CHECK-NEXT: s_add_i32 s3, s32, s3
; CHECK-NEXT: s_add_i32 s34, s32, s34
; CHECK-NEXT: s_add_i32 s35, s32, s35
; CHECK-NEXT: s_add_i32 s36, s32, s36
; CHECK-NEXT: s_add_i32 s37, s32, s37
; CHECK-NEXT: s_add_i32 s38, s32, s38
; CHECK-NEXT: s_or_b32 s39, s32, 4
; CHECK-NEXT: s_or_b32 s40, s32, 8
; CHECK-NEXT: s_or_b32 s41, s32, 12

View File

@ -74,7 +74,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s0, 0x5000
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
; FLATSCR-NEXT: s_addk_i32 s0, 0x3000
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:208 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_movk_i32 s0, 0x3000
@ -175,7 +176,9 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_add_i32 s0, s33, 0x5000
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
; FLATSCR-NEXT: s_add_i32 s1, s33, s0
; FLATSCR-NEXT: s_add_i32 s0, s1, 0x3000
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_add_i32 s0, s33, 0x3000
@ -223,30 +226,35 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_cbranch_scc1 .LBB2_1
; MUBUF-NEXT: ; %bb.2: ; %split
; MUBUF-NEXT: s_movk_i32 s5, 0x12d4
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1
; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1
; MUBUF-NEXT: s_movk_i32 s5, 0x12d0
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_movk_i32 s4, 0x4000
; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1
; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1
; MUBUF-NEXT: s_movk_i32 s5, 0x12c4
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0
; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12c4, v1
; MUBUF-NEXT: v_mov_b32_e32 v3, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1
; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_mov_b32_e32 v0, s4
; MUBUF-NEXT: v_or_b32_e32 v2, 0x12cc, v3
; MUBUF-NEXT: s_movk_i32 s4, 0x12cc
; MUBUF-NEXT: v_mov_b32_e32 v3, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v2, s4, v3
; MUBUF-NEXT: s_movk_i32 s4, 0x12c8
; MUBUF-NEXT: v_mov_b32_e32 v6, 0x4000
; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_mov_b32_e32 v7, 0x4000
; MUBUF-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v2, 0x12c8, v6
; MUBUF-NEXT: v_or_b32_e32 v2, s4, v6
; MUBUF-NEXT: v_mov_b32_e32 v8, 0x4000
; MUBUF-NEXT: v_mov_b32_e32 v9, 0x4000
; MUBUF-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen glc
@ -298,7 +306,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_cbranch_scc1 .LBB2_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s0, 0x3000
; FLATSCR-NEXT: s_movk_i32 s0, 0x1000
; FLATSCR-NEXT: s_addk_i32 s0, 0x2000
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc

View File

@ -0,0 +1,108 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
define protected amdgpu_kernel void @no_folding_imm_to_inst_with_fi(<4 x i64> %val4, <16 x i64> %val16) {
; CHECK-LABEL: no_folding_imm_to_inst_with_fi:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: s_load_b256 s[36:43], s[4:5], 0x24
; CHECK-NEXT: s_load_b512 s[16:31], s[4:5], 0xe4
; CHECK-NEXT: s_load_b512 s[0:15], s[4:5], 0xa4
; CHECK-NEXT: s_mov_b64 s[34:35], src_private_base
; CHECK-NEXT: s_movk_i32 s33, 0x70
; CHECK-NEXT: s_movk_i32 s34, 0x60
; CHECK-NEXT: s_or_b32 s44, 0x80, s33
; CHECK-NEXT: s_mov_b32 s45, s35
; CHECK-NEXT: s_or_b32 s46, 0x80, s34
; CHECK-NEXT: s_mov_b32 s47, s35
; CHECK-NEXT: v_dual_mov_b32 v20, s44 :: v_dual_mov_b32 v21, s45
; CHECK-NEXT: v_dual_mov_b32 v22, s46 :: v_dual_mov_b32 v23, s47
; CHECK-NEXT: s_movk_i32 s34, 0x80
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: v_dual_mov_b32 v34, s34 :: v_dual_mov_b32 v35, s35
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v1, s41
; CHECK-NEXT: v_dual_mov_b32 v2, s42 :: v_dual_mov_b32 v3, s43
; CHECK-NEXT: v_dual_mov_b32 v4, s36 :: v_dual_mov_b32 v5, s37
; CHECK-NEXT: v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v7, s39
; CHECK-NEXT: scratch_store_b128 off, v[0:3], off offset:16 scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: v_dual_mov_b32 v0, s20 :: v_dual_mov_b32 v1, s21
; CHECK-NEXT: s_movk_i32 s20, 0x50
; CHECK-NEXT: v_dual_mov_b32 v8, s28 :: v_dual_mov_b32 v9, s29
; CHECK-NEXT: v_dual_mov_b32 v10, s30 :: v_dual_mov_b32 v11, s31
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_or_b32 s20, 0x80, s20
; CHECK-NEXT: s_mov_b32 s21, s35
; CHECK-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25
; CHECK-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27
; CHECK-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s23
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: v_dual_mov_b32 v25, s21 :: v_dual_mov_b32 v24, s20
; CHECK-NEXT: scratch_store_b128 off, v[4:7], off scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[20:21], v[8:11] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[22:23], v[12:15] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[24:25], v[0:3] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; CHECK-NEXT: s_or_b32 s16, 0x80, 64
; CHECK-NEXT: s_mov_b32 s17, s35
; CHECK-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13
; CHECK-NEXT: s_or_b32 s12, 0x80, 48
; CHECK-NEXT: s_mov_b32 s13, s35
; CHECK-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
; CHECK-NEXT: s_or_b32 s8, 0x80, 32
; CHECK-NEXT: s_mov_b32 s9, s35
; CHECK-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5
; CHECK-NEXT: s_or_b32 s4, 0x80, 16
; CHECK-NEXT: s_mov_b32 s5, s35
; CHECK-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: v_dual_mov_b32 v27, s17 :: v_dual_mov_b32 v26, s16
; CHECK-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15
; CHECK-NEXT: v_dual_mov_b32 v29, s13 :: v_dual_mov_b32 v28, s12
; CHECK-NEXT: v_dual_mov_b32 v31, s9 :: v_dual_mov_b32 v30, s8
; CHECK-NEXT: v_dual_mov_b32 v33, s5 :: v_dual_mov_b32 v32, s4
; CHECK-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
; CHECK-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7
; CHECK-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; CHECK-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; CHECK-NEXT: flat_store_b128 v[26:27], v[0:3] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[28:29], v[4:7] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[30:31], v[8:11] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[32:33], v[12:15] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[34:35], v[16:19] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[22:23] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[20:21] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[24:25] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[30:31] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[28:29] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[34:35] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[32:33] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt 0x0
; CHECK-NEXT: s_endpgm
bb:
%alloca = alloca <4 x i64>, align 32, addrspace(5)
%alloca1 = alloca <16 x i64>, align 128, addrspace(5)
store volatile <4 x i64> %val4, ptr addrspace(5) %alloca
%ascast = addrspacecast ptr addrspace(5) %alloca1 to ptr
store volatile <16 x i64> %val16, ptr %ascast
%load = load volatile <16 x i64>, ptr %ascast
ret void
}