[AMDGPU] Latency calculation must be independent of meta insts (#177052)
Debug and other meta instructions in bundles must not affect latency calculation. Ensure that code compiled with and without debug instructions is identical. --------- Signed-off-by: John Lu <John.Lu@amd.com>
This commit is contained in:
parent
6a9699f194
commit
170de19a5b
@ -644,6 +644,8 @@ void GCNSubtarget::adjustSchedDependency(
|
||||
MachineBasicBlock::const_instr_iterator E(DefI->getParent()->instr_end());
|
||||
unsigned Lat = 0;
|
||||
for (++I; I != E && I->isBundledWithPred(); ++I) {
|
||||
if (I->isMetaInstruction())
|
||||
continue;
|
||||
if (I->modifiesRegister(Reg, TRI))
|
||||
Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
|
||||
else if (Lat)
|
||||
@ -657,6 +659,8 @@ void GCNSubtarget::adjustSchedDependency(
|
||||
MachineBasicBlock::const_instr_iterator E(UseI->getParent()->instr_end());
|
||||
unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *DefI);
|
||||
for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
|
||||
if (I->isMetaInstruction())
|
||||
continue;
|
||||
if (I->readsRegister(Reg, TRI))
|
||||
break;
|
||||
--Lat;
|
||||
|
||||
@ -0,0 +1,44 @@
|
||||
; RUN: opt %s -strip-debug -o %t.no_debug.ll -S
|
||||
; RUN: llc -mcpu=gfx1250 < %s -filetype=obj -o %t.with_debug.o
|
||||
; RUN: llc -mcpu=gfx1250 < %t.no_debug.ll -filetype=obj -o %t.no_debug.o
|
||||
; RUN: llvm-strip %t.with_debug.o %t.no_debug.o
|
||||
; RUN: cmp %t.with_debug.o %t.no_debug.o
|
||||
; Ensure that compiling with and without debug generates identical code.
|
||||
; Test that adjustSchedDependency does not count debug instructions in bundles.
|
||||
|
||||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define amdgpu_kernel void @_test_adjustSchedDependency(ptr addrspace(1) %AA.coerce, i64 %shiftA, i32 %lda, ptr addrspace(3) %stPtr) !dbg !4 {
|
||||
entry:
|
||||
#dbg_value(i32 0, !10, !DIExpression(), !13)
|
||||
#dbg_value(ptr addrspace(1) %AA.coerce, !14, !DIExpression(), !13)
|
||||
%add.ptr1.i = getelementptr float, ptr addrspace(1) %AA.coerce, i64 %shiftA
|
||||
%mul15.13 = mul i32 %lda, 13
|
||||
%idxprom.13 = sext i32 %mul15.13 to i64
|
||||
%arrayidx.13 = getelementptr float, ptr addrspace(1) %add.ptr1.i, i64 %idxprom.13
|
||||
%floatval = load float, ptr addrspace(1) %arrayidx.13, align 4
|
||||
%floatpair = insertelement <2 x float> zeroinitializer, float %floatval, i64 0
|
||||
store <2 x float> %floatpair, ptr addrspace(3) %stPtr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "AMD clang version 22.0.0git (ssh://github-emu/AMD-Lightning-Internal/llvm-project 25425 c51a87b7a53a3e8f308402aaffa3ecbc2953305a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, imports: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "test.cpp", directory: "/tmp", checksumkind: CSK_MD5, checksum: "cc205700bf3536fe4ff21a07daf7e01d")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = distinct !DISubprogram(name: "test_adjustSchedDependency", linkageName: "_test_adjustSchedDependency", scope: !6, file: !5, line: 142, type: !8, scopeLine: 150, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, templateParams: !2, retainedNodes: !2)
|
||||
!5 = !DIFile(filename: "kernels.hpp", directory: "/tmp")
|
||||
!6 = !DINamespace(name: "v33200", scope: !7, exportSymbols: true)
|
||||
!7 = !DINamespace(name: "solve", scope: null)
|
||||
!8 = distinct !DISubroutineType(types: !9)
|
||||
!9 = !{null}
|
||||
!10 = !DILocalVariable(name: "m", arg: 1, scope: !4, file: !5, line: 142, type: !11)
|
||||
!11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !12)
|
||||
!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!13 = !DILocation(line: 0, scope: !4)
|
||||
!14 = !DILocalVariable(name: "AA", arg: 2, scope: !4, file: !5, line: 143, type: !15)
|
||||
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64)
|
||||
!16 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
|
||||
@ -15960,24 +15960,24 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s22, 18
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s52
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s47
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s23, 19
|
||||
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s46
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s47
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s48
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s49
|
||||
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s24, 20
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s46
|
||||
; GFX10-NEXT: s_mov_b32 s20, s36
|
||||
; GFX10-NEXT: s_mov_b32 s21, s37
|
||||
; GFX10-NEXT: s_mov_b32 s22, s38
|
||||
; GFX10-NEXT: s_mov_b32 s23, s39
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s25, 21
|
||||
; GFX10-NEXT: s_mov_b32 s23, s39
|
||||
; GFX10-NEXT: s_mov_b32 s24, s40
|
||||
; GFX10-NEXT: s_mov_b32 s25, s41
|
||||
; GFX10-NEXT: v_mov_b32_e32 v4, s50
|
||||
; GFX10-NEXT: v_mov_b32_e32 v5, s51
|
||||
; GFX10-NEXT: v_writelane_b32 v40, s26, 22
|
||||
; GFX10-NEXT: s_mov_b32 s26, s42
|
||||
; GFX10-NEXT: v_mov_b32_e32 v5, s51
|
||||
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32
|
||||
; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
|
||||
; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
|
||||
@ -16180,27 +16180,27 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18
|
||||
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2
|
||||
; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 24
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
|
||||
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
|
||||
; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 24
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
|
||||
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s2
|
||||
; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s3
|
||||
; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23
|
||||
; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user