The mov64 pseudo is split into two 32 bit movs, but those 32 bit movs had the full 64-bit register still implicitly defined. VOPD formation is affected, so we can emit more of them.
31 lines
1.1 KiB
YAML
31 lines
1.1 KiB
YAML
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-after=si-load-store-opt %s -o - | FileCheck %s
|
|
|
|
# CHECK: misaligned_regsequence:
|
|
# CHECK: ; %bb.0:
|
|
# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
# CHECK: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
# CHECK: v_mov_b32_e32 v4, 0
|
|
# CHECK: v_mov_b32_e32 v5, 0
|
|
# CHECK: v_mov_b32_e32 v6, 0
|
|
# CHECK: s_waitcnt lgkmcnt(0)
|
|
# CHECK: v_mov_b64_e32 v[2:3], s[0:1]
|
|
# CHECK: flat_store_dwordx3 v[2:3], v[4:6]
|
|
# CHECK: s_endpgm
|
|
|
|
---
|
|
name: misaligned_regsequence
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr4_sgpr5
|
|
|
|
%0:sgpr_64 = COPY $sgpr4_sgpr5
|
|
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
|
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%3:vreg_64_align2 = COPY %1
|
|
%4:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
|
|
%5:vreg_96_align2 = REG_SEQUENCE killed %2, %subreg.sub0, killed %4, %subreg.sub1_sub2
|
|
FLAT_STORE_DWORDX3 %3, killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96), align 4)
|
|
S_ENDPGM 0
|
|
...
|