[AMDGPU] 4-align TTMP triples (#132759)
Follow up to e4284a7c70cd "[AMDGPU] 4-align SGPR triples". Previously TTMP triples like ttmp[3:5] were aligned on a 3-TTMP boundary which has no basis in hardware. Aligning them on a 4-TTMP boundary matches what we do for SGPRs, which reduces the number of extra register classes synthesized by TableGen, bringing the total number down from 653 to 615.
This commit is contained in:
parent
34fa037c4f
commit
02ed65912e
@ -431,7 +431,7 @@ def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
|
||||
def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
|
||||
|
||||
// Trap handler TMP 96-bit registers
|
||||
def TTMP_96Regs : SIRegisterTuples<getSubRegs<3>.ret, TTMP_32, 15, 3, 3, "ttmp">;
|
||||
def TTMP_96Regs : SIRegisterTuples<getSubRegs<3>.ret, TTMP_32, 15, 4, 3, "ttmp">;
|
||||
|
||||
// Trap handler TMP 128-bit registers
|
||||
def TTMP_128Regs : SIRegisterTuples<getSubRegs<4>.ret, TTMP_32, 15, 4, 4, "ttmp">;
|
||||
|
@ -129,13 +129,13 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]]
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY2]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY2]]
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vreg_64 = COPY $vgpr2_vgpr3
|
||||
undef %2.sub0_sub1:areg_128 = COPY %0
|
||||
%2.sub2_sub3:areg_128 = COPY %1
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %2
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, killed %2
|
||||
SI_RETURN
|
||||
|
||||
...
|
||||
@ -154,13 +154,13 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]]
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY1]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY2]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY2]]
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vreg_64 = COPY $vgpr2_vgpr3
|
||||
undef %2.sub0_sub1:areg_128_align2 = COPY %0
|
||||
%2.sub2_sub3:areg_128_align2 = COPY %1
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, %2
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %2
|
||||
SI_RETURN
|
||||
|
||||
...
|
||||
@ -399,14 +399,14 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128 = COPY [[COPY]]
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128 = COPY [[COPY]]
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128 = COPY [[COPY]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY1]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY1]]
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
undef %1.sub0:areg_128 = COPY %0
|
||||
%1.sub1:areg_128 = COPY %0
|
||||
%1.sub2:areg_128 = COPY %0
|
||||
%1.sub3:areg_128 = COPY %0
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %1
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, killed %1
|
||||
SI_RETURN
|
||||
|
||||
...
|
||||
@ -426,14 +426,14 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128_align2 = COPY [[COPY]]
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128_align2 = COPY [[COPY]]
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128_align2 = COPY [[COPY]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY1]]
|
||||
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY1]]
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
undef %1.sub0:areg_128_align2 = COPY %0
|
||||
%1.sub1:areg_128_align2 = COPY %0
|
||||
%1.sub2:areg_128_align2 = COPY %0
|
||||
%1.sub3:areg_128_align2 = COPY %0
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, %1
|
||||
INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %1
|
||||
SI_RETURN
|
||||
|
||||
...
|
||||
|
@ -8,16 +8,16 @@
|
||||
define amdgpu_kernel void @s_input_output_i128() {
|
||||
; GFX908-LABEL: name: s_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7471114 /* regdef:SGPR_128 */, def %12
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %12
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %12
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7471113 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX90A-LABEL: name: s_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7471114 /* regdef:SGPR_128 */, def %10
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %10
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %10
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7471113 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = tail call i128 asm sideeffect "; def $0", "=s"()
|
||||
call void asm sideeffect "; use $0", "s"(i128 %val)
|
||||
@ -27,16 +27,16 @@ define amdgpu_kernel void @s_input_output_i128() {
|
||||
define amdgpu_kernel void @v_input_output_i128() {
|
||||
; GFX908-LABEL: name: v_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %12
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %12
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %12
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:VReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX90A-LABEL: name: v_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %10
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %10
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %10
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6619145 /* reguse:VReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:VReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = tail call i128 asm sideeffect "; def $0", "=v"()
|
||||
call void asm sideeffect "; use $0", "v"(i128 %val)
|
||||
@ -46,16 +46,16 @@ define amdgpu_kernel void @v_input_output_i128() {
|
||||
define amdgpu_kernel void @a_input_output_i128() {
|
||||
; GFX908-LABEL: name: a_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:AReg_128 */, def %12
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128 */, def %12
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %12
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX90A-LABEL: name: a_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6488074 /* regdef:AReg_128_Align2 */, def %10
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:AReg_128_Align2 */, def %10
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %10
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = call i128 asm sideeffect "; def $0", "=a"()
|
||||
call void asm sideeffect "; use $0", "a"(i128 %val)
|
||||
|
@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
|
||||
; REGALLOC-GFX908-NEXT: {{ $}}
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %6
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %6
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %7
|
||||
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
|
||||
; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
|
||||
@ -20,8 +20,8 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
|
||||
; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
|
||||
; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
|
||||
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
|
||||
; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
|
||||
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
|
||||
; REGALLOC-GFX908-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; PEI-GFX908-LABEL: name: partial_copy
|
||||
@ -32,7 +32,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
|
||||
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
@ -56,7 +56,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
|
||||
; REGALLOC-GFX90A-NEXT: {{ $}}
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %6
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %6
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %7
|
||||
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
|
||||
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
|
||||
@ -76,7 +76,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
|
||||
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
|
Loading…
x
Reference in New Issue
Block a user