LocalStackSlotAllocation pass disallows negative offsets with respect to a base register. The pass ends up introducing a new register for such frame references. This patch helps LocalStackSlotAlloca to additionally consider the immediate offset of an instruction, when sorting frame refs - hence, avoiding negative offsets and maximizing reuse of the existing registers.
81 lines
4.9 KiB
YAML
81 lines
4.9 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s
|
|
|
|
---
|
|
name: local_stack_alloc__v_add_u32_e64__literal_offsets
|
|
tracksRegLiveness: true
|
|
stack:
|
|
- { id: 0, size: 4096, alignment: 4 }
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr33'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets
|
|
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
|
|
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
|
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], 256, 0, implicit $exec
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX10-NEXT: SI_RETURN
|
|
;
|
|
; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets
|
|
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
|
|
; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX12-NEXT: SI_RETURN
|
|
%0:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, %0
|
|
%1:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, %1
|
|
SI_RETURN
|
|
|
|
...
|
|
|
|
---
|
|
name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
|
|
tracksRegLiveness: true
|
|
stack:
|
|
- { id: 0, size: 4096, alignment: 4 }
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr33'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
|
|
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 100
|
|
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 156, [[V_ADD_U32_e64_]], 0, implicit $exec
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 412, [[V_ADD_U32_e64_]], 0, implicit $exec
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
|
|
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
|
|
; GFX10-NEXT: SI_RETURN
|
|
;
|
|
; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
|
|
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
|
|
; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX12-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
|
|
; GFX12-NEXT: SI_RETURN
|
|
%0:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, %0
|
|
%1:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, %1
|
|
%2:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, %2
|
|
SI_RETURN
|
|
|
|
...
|
|
|