[AMDGPU] Add missing physical register check in SIFoldOperands::tryFoldLoad
tryFoldLoad() is not meant to work on physical registers moreover use_nodbg_instructions(reg) makes the compiler buggy when called with physical reg Fix for SWDEV-373493 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D141895
This commit is contained in:
parent
01eb01c7fd
commit
2a832d0f09
@ -1706,6 +1706,9 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
|
||||
if (!I->isCopy() && !I->isRegSequence())
|
||||
return false;
|
||||
Register DstReg = I->getOperand(0).getReg();
|
||||
// Physical registers may have more than one instruction definitions
|
||||
if (DstReg.isPhysical())
|
||||
return false;
|
||||
if (TRI->isAGPR(*MRI, DstReg))
|
||||
continue;
|
||||
MoveRegs.push_back(DstReg);
|
||||
|
||||
44
llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir
Normal file
44
llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir
Normal file
@ -0,0 +1,44 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -verify-machineinstrs | FileCheck %s
|
||||
|
||||
---
|
||||
name: lshl_add_u64_gep
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
|
||||
; CHECK-LABEL: name: lshl_add_u64_gep
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
||||
; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 2, [[REG_SEQUENCE]], implicit $exec
|
||||
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[V_LSHLREV_B64_e64_]].sub0, 0, implicit $exec
|
||||
; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY2]], [[V_LSHLREV_B64_e64_]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
|
||||
; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||
%0:vgpr_32 = COPY $vgpr3
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vgpr_32 = COPY $vgpr1
|
||||
%3:vgpr_32 = COPY $vgpr0
|
||||
%4:vreg_64_align2 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
|
||||
%5:sreg_32 = S_MOV_B32 2
|
||||
%6:vreg_64_align2 = V_LSHLREV_B64_e64 killed %5, %4, implicit $exec
|
||||
%7:vgpr_32 = COPY %3
|
||||
%8:vgpr_32 = COPY %6.sub0
|
||||
%9:vgpr_32 = COPY %2
|
||||
%10:vgpr_32 = COPY %6.sub1
|
||||
%11:vgpr_32, %12:sreg_64_xexec = V_ADD_CO_U32_e64 %7, %8, 0, implicit $exec
|
||||
%13:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 %9, %10, killed %12, 0, implicit $exec
|
||||
%15:vreg_64_align2 = REG_SEQUENCE %11, %subreg.sub0, %13, %subreg.sub1
|
||||
%16:vgpr_32 = FLAT_LOAD_DWORD killed %15, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = COPY %16
|
||||
SI_RETURN implicit $vgpr0
|
||||
|
||||
...
|
||||
78
llvm/test/CodeGen/AMDGPU/swdev373493.ll
Normal file
78
llvm/test/CodeGen/AMDGPU/swdev373493.ll
Normal file
@ -0,0 +1,78 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck %s
|
||||
|
||||
@global = external protected addrspace(4) externally_initialized global [4096 x i64], align 16
|
||||
|
||||
define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, ptr %arg6) unnamed_addr align 2 {
|
||||
; CHECK-LABEL: bar:
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v15, v12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v14, v11
|
||||
; CHECK-NEXT: v_mov_b32_e32 v13, v10
|
||||
; CHECK-NEXT: v_mov_b32_e32 v12, v9
|
||||
; CHECK-NEXT: v_mov_b32_e32 v11, v8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v10, v7
|
||||
; CHECK-NEXT: v_mov_b32_e32 v9, v6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v8, v5
|
||||
; CHECK-NEXT: v_mov_b32_e32 v7, v4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v6, v3
|
||||
; CHECK-NEXT: s_cmp_lt_i32 s4, 3
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB0_3
|
||||
; CHECK-NEXT: ; %bb.1: ; %LeafBlock
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB0_5
|
||||
; CHECK-NEXT: ; %bb.2: ; %bb7
|
||||
; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, global@rel32@lo+1948
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, global@rel32@hi+1956
|
||||
; CHECK-NEXT: v_mov_b32_e32 v5, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s16
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s17
|
||||
; CHECK-NEXT: s_getpc_b64 s[18:19]
|
||||
; CHECK-NEXT: s_add_u32 s18, s18, eggs@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s19, s19, eggs@rel32@hi+12
|
||||
; CHECK-NEXT: s_setpc_b64 s[18:19]
|
||||
; CHECK-NEXT: .LBB0_3: ; %LeafBlock1
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB0_5
|
||||
; CHECK-NEXT: ; %bb.4: ; %bb8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, v1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, v2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, v6
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, v7
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, v8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v5, v9
|
||||
; CHECK-NEXT: v_mov_b32_e32 v6, v10
|
||||
; CHECK-NEXT: v_mov_b32_e32 v7, v11
|
||||
; CHECK-NEXT: v_mov_b32_e32 v8, v12
|
||||
; CHECK-NEXT: v_mov_b32_e32 v9, v13
|
||||
; CHECK-NEXT: v_mov_b32_e32 v10, v14
|
||||
; CHECK-NEXT: v_mov_b32_e32 v11, v15
|
||||
; CHECK-NEXT: s_getpc_b64 s[16:17]
|
||||
; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12
|
||||
; CHECK-NEXT: s_setpc_b64 s[16:17]
|
||||
; CHECK-NEXT: .LBB0_5: ; %bb9
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
switch i32 undef, label %bb9 [
|
||||
i32 3, label %bb8
|
||||
i32 1, label %bb7
|
||||
]
|
||||
|
||||
bb7: ; preds = %bb
|
||||
%tmp = load ptr, ptr undef, align 8
|
||||
tail call fastcc void @eggs(ptr noundef addrspacecast (ptr addrspace(4) getelementptr inbounds ([4096 x i64], ptr addrspace(4) @global, i64 0, i64 243) to ptr), ptr %tmp, ptr undef, ptr noundef nonnull align 8 dereferenceable(24) %arg2, ptr noundef %arg3, ptr noundef %arg4, ptr noundef %arg5)
|
||||
br label %bb9
|
||||
|
||||
bb8: ; preds = %bb
|
||||
tail call fastcc void @quux(ptr noundef nonnull align 8 dereferenceable(24) %arg1, ptr noundef nonnull align 8 dereferenceable(24) %arg2, ptr noundef %arg3, ptr noundef %arg4, ptr noundef %arg5, ptr noundef nonnull align 8 dereferenceable(8) %arg6)
|
||||
br label %bb9
|
||||
|
||||
bb9: ; preds = %bb8, %bb7, %bb
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local fastcc void @eggs(ptr, ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr align 2
|
||||
|
||||
declare dso_local fastcc void @quux(ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr align 2
|
||||
Loading…
x
Reference in New Issue
Block a user