AMDGPU: Stop handling AGPR case in getCrossCopyRegClass (#161800)
This isn't what this is for. In the sense this hook is concerned with, you can copy between AGPRs. This only changes some DAG scheduling decisions; later passes are responsible for dealing with the bad agpr-agpr handling.
This commit is contained in:
parent
f3a952311c
commit
48db3fd702
@ -1118,11 +1118,8 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const {
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
|
||||
if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
|
||||
return getEquivalentVGPRClass(RC);
|
||||
if (RC == &AMDGPU::SCC_CLASSRegClass)
|
||||
return getWaveMaskRegClass();
|
||||
|
||||
return RC;
|
||||
}
|
||||
|
||||
|
||||
@ -146,9 +146,9 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
|
||||
; GFX908-NEXT: ;;#ASMSTART
|
||||
; GFX908-NEXT: ; copy
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
|
||||
; GFX908-NEXT: ;;#ASMSTART
|
||||
; GFX908-NEXT: ; use a3 v[0:31]
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
@ -437,9 +437,9 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 {
|
||||
; GFX908-NEXT: ; copy
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
; GFX908-NEXT: s_nop 7
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v33, a2
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v35, a2
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v33
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v35
|
||||
; GFX908-NEXT: ;;#ASMSTART
|
||||
; GFX908-NEXT: ; use a3 v[0:31]
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
@ -1045,9 +1045,9 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
|
||||
; GFX908-NEXT: ;;#ASMSTART
|
||||
; GFX908-NEXT: ; copy
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
|
||||
; GFX908-NEXT: ;;#ASMSTART
|
||||
; GFX908-NEXT: ; use a3 v[0:31]
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
|
||||
@ -40,8 +40,8 @@ body: |
|
||||
; GFX908: liveins: $agpr0
|
||||
; GFX908-NEXT: {{ $}}
|
||||
; GFX908-NEXT: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
|
||||
; GFX908-NEXT: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
|
||||
; GFX908-NEXT: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
|
||||
; GFX908-NEXT: renamable $agpr1 = COPY $agpr0, implicit $exec
|
||||
; GFX908-NEXT: renamable $agpr2 = COPY $agpr0, implicit $exec
|
||||
; GFX908-NEXT: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
|
||||
;
|
||||
; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr
|
||||
|
||||
@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 {
|
||||
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31]
|
||||
; GREEDY908-NEXT: s_nop 15
|
||||
; GREEDY908-NEXT: s_nop 1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1
|
||||
; GREEDY908-NEXT: s_nop 0
|
||||
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31]
|
||||
; GREEDY908-NEXT: s_nop 15
|
||||
@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 {
|
||||
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33]
|
||||
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33]
|
||||
; GREEDY908-NEXT: s_nop 8
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19
|
||||
; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18
|
||||
; GREEDY908-NEXT: s_nop 0
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2
|
||||
; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3
|
||||
; GREEDY908-NEXT: s_nop 0
|
||||
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15]
|
||||
; GREEDY908-NEXT: s_nop 9
|
||||
|
||||
@ -54,19 +54,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) {
|
||||
; GFX908-NEXT: s_branch .LBB0_2
|
||||
; GFX908-NEXT: .LBB0_1: ; %bb2
|
||||
; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
||||
; GFX908-NEXT: s_nop 6
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v3, a2
|
||||
; GFX908-NEXT: s_or_b32 s4, s3, 1
|
||||
; GFX908-NEXT: s_ashr_i32 s5, s3, 31
|
||||
; GFX908-NEXT: s_mov_b32 s3, s2
|
||||
; GFX908-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX908-NEXT: s_nop 2
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v0, a2
|
||||
; GFX908-NEXT: v_mov_b32_e32 v2, s3
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a0, v3
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v3, a1
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
|
||||
; GFX908-NEXT: s_and_b32 s3, s5, s4
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v3
|
||||
; GFX908-NEXT: s_and_b32 s3, s5, s4
|
||||
; GFX908-NEXT: s_nop 0
|
||||
; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[2:5], v[1:2], v[1:2], a[0:3]
|
||||
; GFX908-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GFX908-NEXT: .LBB0_2: ; %bb
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user