[AMDGPU] Do not fold an immediate into instructions with frame indexes (#151263)
Do not fold an immediate into an instruction that already has a frame index operand. A frame index could possibly turn out to be another immediate. Fixes: SWDEV-536263 --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
This commit is contained in:
parent
35bd40d321
commit
32161e9de3
@ -6122,10 +6122,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
|
||||
!Op.isIdenticalTo(*MO))
|
||||
return false;
|
||||
|
||||
// Do not fold a frame index into an instruction that already has a frame
|
||||
// index. The frame index handling code doesn't handle fixing up operand
|
||||
// constraints if there are multiple indexes.
|
||||
if (Op.isFI() && MO->isFI())
|
||||
// Do not fold a non-inlineable and non-register operand into an
|
||||
// instruction that already has a frame index. The frame index handling
|
||||
// code could not handle well when a frame index co-exists with another
|
||||
// non-register operand, unless that operand is an inlineable immediate.
|
||||
if (Op.isFI())
|
||||
return false;
|
||||
}
|
||||
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
|
||||
|
@ -1917,8 +1917,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
@ -1933,7 +1934,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0
|
||||
@ -1945,10 +1947,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX942-LABEL: store_load_large_imm_offset_kernel:
|
||||
; GFX942: ; %bb.0: ; %bb
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX942-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; GFX942-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
|
||||
@ -1958,7 +1961,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX11-LABEL: store_load_large_imm_offset_kernel:
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
|
||||
; GFX11-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
|
||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
|
||||
@ -1986,8 +1991,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
|
||||
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
|
||||
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
@ -2002,7 +2008,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
|
||||
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
|
||||
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
|
||||
@ -2014,10 +2021,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
|
||||
; UNALIGNED_GFX942: ; %bb.0: ; %bb
|
||||
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
|
||||
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
|
||||
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
|
||||
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
|
||||
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
|
||||
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
|
||||
@ -2027,7 +2035,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
|
||||
; UNALIGNED_GFX11: ; %bb.0: ; %bb
|
||||
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
|
||||
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
|
||||
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
|
||||
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
|
||||
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
|
||||
@ -2061,11 +2071,13 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX9-LABEL: store_load_large_imm_offset_foo:
|
||||
; GFX9: ; %bb.0: ; %bb
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX9-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; GFX9-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
@ -2076,8 +2088,10 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX10: ; %bb.0: ; %bb
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; GFX10-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX10-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0
|
||||
@ -2089,11 +2103,13 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX942-LABEL: store_load_large_imm_offset_foo:
|
||||
; GFX942: ; %bb.0: ; %bb
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX942-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; GFX942-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
|
||||
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
|
||||
@ -2104,7 +2120,10 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX11: ; %bb.0: ; %bb
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
|
||||
; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX11-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
|
||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
|
||||
@ -2133,11 +2152,13 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
|
||||
; UNALIGNED_GFX9: ; %bb.0: ; %bb
|
||||
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
|
||||
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
|
||||
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
|
||||
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
|
||||
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
|
||||
@ -2148,8 +2169,10 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; UNALIGNED_GFX10: ; %bb.0: ; %bb
|
||||
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
|
||||
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
|
||||
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
|
||||
@ -2161,11 +2184,13 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
|
||||
; UNALIGNED_GFX942: ; %bb.0: ; %bb
|
||||
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
|
||||
; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
|
||||
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
|
||||
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
|
||||
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
|
||||
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
|
||||
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
|
||||
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
|
||||
@ -2176,7 +2201,10 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; UNALIGNED_GFX11: ; %bb.0: ; %bb
|
||||
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
|
||||
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
|
||||
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
|
||||
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
|
||||
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
|
||||
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
|
||||
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
|
||||
|
@ -3621,7 +3621,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX9-NEXT: s_mov_b32 s0, 0
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_movk_i32 s0, 0x3004
|
||||
; GFX9-NEXT: s_movk_i32 s0, 0x3000
|
||||
; GFX9-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -3637,7 +3638,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3804
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX10-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -3682,7 +3684,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:4
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3004
|
||||
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
|
||||
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -3716,8 +3719,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
|
||||
; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX1010-PAL-NEXT: s_mov_b32 s1, 0
|
||||
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3804
|
||||
; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4
|
||||
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -3739,7 +3743,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
|
||||
; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3804
|
||||
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4
|
||||
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -3785,10 +3790,12 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX9-LABEL: store_load_large_imm_offset_foo:
|
||||
; GFX9: ; %bb.0: ; %bb
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_movk_i32 s0, 0x3000
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX9-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_add_i32 s0, s32, 0x3004
|
||||
; GFX9-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -3800,8 +3807,10 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX10: ; %bb.0: ; %bb
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-NEXT: s_add_i32 s0, s32, 0x3804
|
||||
; GFX10-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX10-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
@ -3843,10 +3852,12 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX9-PAL-LABEL: store_load_large_imm_offset_foo:
|
||||
; GFX9-PAL: ; %bb.0: ; %bb
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX9-PAL-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x3004
|
||||
; GFX9-PAL-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
|
||||
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
|
||||
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -3872,8 +3883,10 @@ define void @store_load_large_imm_offset_foo() {
|
||||
; GFX10-PAL: ; %bb.0: ; %bb
|
||||
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
|
||||
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
|
||||
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
|
||||
; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x3804
|
||||
; GFX10-PAL-NEXT: s_add_i32 s1, s32, s0
|
||||
; GFX10-PAL-NEXT: s_add_i32 s0, s1, 4
|
||||
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
|
||||
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
|
||||
|
@ -75,7 +75,8 @@ stack:
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_0
|
||||
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 256, implicit-def $scc
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 256
|
||||
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]], implicit-def $scc
|
||||
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
|
||||
; CHECK-NEXT: SI_RETURN implicit $sgpr4
|
||||
%0:sreg_32 = S_MOV_B32 %stack.0
|
||||
|
@ -46,7 +46,8 @@ body: |
|
||||
%2:sreg_32 = S_LSHL2_ADD_U32 %0, %1, implicit-def $scc
|
||||
...
|
||||
# GCN-LABEL: name: test_frameindex{{$}}
|
||||
# GCN: %1:sreg_32 = S_ADD_I32 %stack.0, 70
|
||||
# GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 70
|
||||
# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]]
|
||||
---
|
||||
name: test_frameindex
|
||||
tracksRegLiveness: true
|
||||
|
@ -360,7 +360,8 @@ entry:
|
||||
; s_add_i32.
|
||||
|
||||
; GCN-LABEL: {{^}}fi_sop2_s_add_u32_literal_error:
|
||||
; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010
|
||||
; GCN: s_movk_i32 [[S_MOVK_I32_:s[0-9]+]], 0x1000
|
||||
; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0x1010, [[S_MOVK_I32_]]
|
||||
; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0
|
||||
define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 {
|
||||
entry:
|
||||
|
@ -6,16 +6,24 @@ define amdgpu_gfx [13 x i32] @issue130120() {
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_add_i32 s0, s32, 0xf0
|
||||
; CHECK-NEXT: s_add_i32 s1, s32, 0xf4
|
||||
; CHECK-NEXT: s_add_i32 s2, s32, 0xf8
|
||||
; CHECK-NEXT: s_add_i32 s3, s32, 0xfc
|
||||
; CHECK-NEXT: s_movk_i32 s1, 0xf4
|
||||
; CHECK-NEXT: s_movk_i32 s2, 0xf8
|
||||
; CHECK-NEXT: s_movk_i32 s3, 0xfc
|
||||
; CHECK-NEXT: s_movk_i32 s34, 0x100
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, v0
|
||||
; CHECK-NEXT: s_add_i32 s34, s32, 0x100
|
||||
; CHECK-NEXT: s_add_i32 s35, s32, 0x104
|
||||
; CHECK-NEXT: s_add_i32 s36, s32, 0x108
|
||||
; CHECK-NEXT: s_add_i32 s37, s32, 0x110
|
||||
; CHECK-NEXT: s_add_i32 s38, s32, 0x120
|
||||
; CHECK-NEXT: s_movk_i32 s35, 0x104
|
||||
; CHECK-NEXT: s_movk_i32 s36, 0x108
|
||||
; CHECK-NEXT: s_movk_i32 s37, 0x110
|
||||
; CHECK-NEXT: s_movk_i32 s38, 0x120
|
||||
; CHECK-NEXT: s_add_i32 s0, s32, 0xf0
|
||||
; CHECK-NEXT: s_add_i32 s1, s32, s1
|
||||
; CHECK-NEXT: s_add_i32 s2, s32, s2
|
||||
; CHECK-NEXT: s_add_i32 s3, s32, s3
|
||||
; CHECK-NEXT: s_add_i32 s34, s32, s34
|
||||
; CHECK-NEXT: s_add_i32 s35, s32, s35
|
||||
; CHECK-NEXT: s_add_i32 s36, s32, s36
|
||||
; CHECK-NEXT: s_add_i32 s37, s32, s37
|
||||
; CHECK-NEXT: s_add_i32 s38, s32, s38
|
||||
; CHECK-NEXT: s_or_b32 s39, s32, 4
|
||||
; CHECK-NEXT: s_or_b32 s40, s32, 8
|
||||
; CHECK-NEXT: s_or_b32 s41, s32, 12
|
||||
|
@ -74,7 +74,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) {
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x5000
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
|
||||
; FLATSCR-NEXT: s_addk_i32 s0, 0x3000
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:208 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x3000
|
||||
@ -175,7 +176,9 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_add_i32 s0, s33, 0x5000
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
|
||||
; FLATSCR-NEXT: s_add_i32 s1, s33, s0
|
||||
; FLATSCR-NEXT: s_add_i32 s0, s1, 0x3000
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_add_i32 s0, s33, 0x3000
|
||||
@ -223,30 +226,35 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: s_cbranch_scc1 .LBB2_1
|
||||
; MUBUF-NEXT: ; %bb.2: ; %split
|
||||
; MUBUF-NEXT: s_movk_i32 s5, 0x12d4
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
|
||||
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1
|
||||
; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1
|
||||
; MUBUF-NEXT: s_movk_i32 s5, 0x12d0
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
|
||||
; MUBUF-NEXT: s_movk_i32 s4, 0x4000
|
||||
; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1
|
||||
; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1
|
||||
; MUBUF-NEXT: s_movk_i32 s5, 0x12c4
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
|
||||
; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0
|
||||
; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12c4, v1
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v3, 0x4000
|
||||
; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1
|
||||
; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen glc
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, s4
|
||||
; MUBUF-NEXT: v_or_b32_e32 v2, 0x12cc, v3
|
||||
; MUBUF-NEXT: s_movk_i32 s4, 0x12cc
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v3, 0x4000
|
||||
; MUBUF-NEXT: v_or_b32_e32 v2, s4, v3
|
||||
; MUBUF-NEXT: s_movk_i32 s4, 0x12c8
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v6, 0x4000
|
||||
; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v7, 0x4000
|
||||
; MUBUF-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen glc
|
||||
; MUBUF-NEXT: s_waitcnt vmcnt(0)
|
||||
; MUBUF-NEXT: v_or_b32_e32 v2, 0x12c8, v6
|
||||
; MUBUF-NEXT: v_or_b32_e32 v2, s4, v6
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v8, 0x4000
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v9, 0x4000
|
||||
; MUBUF-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen glc
|
||||
@ -298,7 +306,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: s_cbranch_scc1 .LBB2_1
|
||||
; FLATSCR-NEXT: ; %bb.2: ; %split
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x3000
|
||||
; FLATSCR-NEXT: s_movk_i32 s0, 0x1000
|
||||
; FLATSCR-NEXT: s_addk_i32 s0, 0x2000
|
||||
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc
|
||||
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
|
||||
; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc
|
||||
|
108
llvm/test/CodeGen/AMDGPU/no-folding-imm-to-inst-with-fi.ll
Normal file
108
llvm/test/CodeGen/AMDGPU/no-folding-imm-to-inst-with-fi.ll
Normal file
@ -0,0 +1,108 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
|
||||
|
||||
define protected amdgpu_kernel void @no_folding_imm_to_inst_with_fi(<4 x i64> %val4, <16 x i64> %val16) {
|
||||
; CHECK-LABEL: no_folding_imm_to_inst_with_fi:
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: s_clause 0x2
|
||||
; CHECK-NEXT: s_load_b256 s[36:43], s[4:5], 0x24
|
||||
; CHECK-NEXT: s_load_b512 s[16:31], s[4:5], 0xe4
|
||||
; CHECK-NEXT: s_load_b512 s[0:15], s[4:5], 0xa4
|
||||
; CHECK-NEXT: s_mov_b64 s[34:35], src_private_base
|
||||
; CHECK-NEXT: s_movk_i32 s33, 0x70
|
||||
; CHECK-NEXT: s_movk_i32 s34, 0x60
|
||||
; CHECK-NEXT: s_or_b32 s44, 0x80, s33
|
||||
; CHECK-NEXT: s_mov_b32 s45, s35
|
||||
; CHECK-NEXT: s_or_b32 s46, 0x80, s34
|
||||
; CHECK-NEXT: s_mov_b32 s47, s35
|
||||
; CHECK-NEXT: v_dual_mov_b32 v20, s44 :: v_dual_mov_b32 v21, s45
|
||||
; CHECK-NEXT: v_dual_mov_b32 v22, s46 :: v_dual_mov_b32 v23, s47
|
||||
; CHECK-NEXT: s_movk_i32 s34, 0x80
|
||||
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; CHECK-NEXT: v_dual_mov_b32 v34, s34 :: v_dual_mov_b32 v35, s35
|
||||
; CHECK-NEXT: s_wait_kmcnt 0x0
|
||||
; CHECK-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v1, s41
|
||||
; CHECK-NEXT: v_dual_mov_b32 v2, s42 :: v_dual_mov_b32 v3, s43
|
||||
; CHECK-NEXT: v_dual_mov_b32 v4, s36 :: v_dual_mov_b32 v5, s37
|
||||
; CHECK-NEXT: v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v7, s39
|
||||
; CHECK-NEXT: scratch_store_b128 off, v[0:3], off offset:16 scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: v_dual_mov_b32 v0, s20 :: v_dual_mov_b32 v1, s21
|
||||
; CHECK-NEXT: s_movk_i32 s20, 0x50
|
||||
; CHECK-NEXT: v_dual_mov_b32 v8, s28 :: v_dual_mov_b32 v9, s29
|
||||
; CHECK-NEXT: v_dual_mov_b32 v10, s30 :: v_dual_mov_b32 v11, s31
|
||||
; CHECK-NEXT: s_wait_alu 0xfffe
|
||||
; CHECK-NEXT: s_or_b32 s20, 0x80, s20
|
||||
; CHECK-NEXT: s_mov_b32 s21, s35
|
||||
; CHECK-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25
|
||||
; CHECK-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27
|
||||
; CHECK-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s23
|
||||
; CHECK-NEXT: s_wait_alu 0xfffe
|
||||
; CHECK-NEXT: v_dual_mov_b32 v25, s21 :: v_dual_mov_b32 v24, s20
|
||||
; CHECK-NEXT: scratch_store_b128 off, v[4:7], off scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_store_b128 v[20:21], v[8:11] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_store_b128 v[22:23], v[12:15] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_store_b128 v[24:25], v[0:3] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
|
||||
; CHECK-NEXT: s_or_b32 s16, 0x80, 64
|
||||
; CHECK-NEXT: s_mov_b32 s17, s35
|
||||
; CHECK-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13
|
||||
; CHECK-NEXT: s_or_b32 s12, 0x80, 48
|
||||
; CHECK-NEXT: s_mov_b32 s13, s35
|
||||
; CHECK-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
||||
; CHECK-NEXT: s_or_b32 s8, 0x80, 32
|
||||
; CHECK-NEXT: s_mov_b32 s9, s35
|
||||
; CHECK-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5
|
||||
; CHECK-NEXT: s_or_b32 s4, 0x80, 16
|
||||
; CHECK-NEXT: s_mov_b32 s5, s35
|
||||
; CHECK-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
|
||||
; CHECK-NEXT: s_wait_alu 0xfffe
|
||||
; CHECK-NEXT: v_dual_mov_b32 v27, s17 :: v_dual_mov_b32 v26, s16
|
||||
; CHECK-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15
|
||||
; CHECK-NEXT: v_dual_mov_b32 v29, s13 :: v_dual_mov_b32 v28, s12
|
||||
; CHECK-NEXT: v_dual_mov_b32 v31, s9 :: v_dual_mov_b32 v30, s8
|
||||
; CHECK-NEXT: v_dual_mov_b32 v33, s5 :: v_dual_mov_b32 v32, s4
|
||||
; CHECK-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
||||
; CHECK-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7
|
||||
; CHECK-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
|
||||
; CHECK-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
|
||||
; CHECK-NEXT: flat_store_b128 v[26:27], v[0:3] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_store_b128 v[28:29], v[4:7] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_store_b128 v[30:31], v[8:11] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_store_b128 v[32:33], v[12:15] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_store_b128 v[34:35], v[16:19] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_storecnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[22:23] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[20:21] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[24:25] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[30:31] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[28:29] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[34:35] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; CHECK-NEXT: flat_load_b128 v[0:3], v[32:33] scope:SCOPE_SYS
|
||||
; CHECK-NEXT: s_wait_loadcnt 0x0
|
||||
; CHECK-NEXT: s_endpgm
|
||||
bb:
|
||||
%alloca = alloca <4 x i64>, align 32, addrspace(5)
|
||||
%alloca1 = alloca <16 x i64>, align 128, addrspace(5)
|
||||
store volatile <4 x i64> %val4, ptr addrspace(5) %alloca
|
||||
%ascast = addrspacecast ptr addrspace(5) %alloca1 to ptr
|
||||
store volatile <16 x i64> %val16, ptr %ascast
|
||||
%load = load volatile <16 x i64>, ptr %ascast
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user