
Pre-enable this optimization before allowing folds of frame indexes into add instructions. Disables this fold when using scratch instructions for now. I see some code size improvements with it, but the optimization needs to be smarter about the uses depending on the register classes.
81 lines
4.9 KiB
YAML
81 lines
4.9 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s
|
|
|
|
---
|
|
name: local_stack_alloc__v_add_u32_e64__literal_offsets
|
|
tracksRegLiveness: true
|
|
stack:
|
|
- { id: 0, size: 4096, alignment: 4 }
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr33'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets
|
|
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
|
|
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
|
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], 256, 0, implicit $exec
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX10-NEXT: SI_RETURN
|
|
;
|
|
; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets
|
|
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
|
|
; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX12-NEXT: SI_RETURN
|
|
%0:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
|
|
%1:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
|
|
SI_RETURN
|
|
|
|
...
|
|
|
|
---
|
|
name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
|
|
tracksRegLiveness: true
|
|
stack:
|
|
- { id: 0, size: 4096, alignment: 4 }
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr33'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
|
|
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
|
|
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
|
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, [[V_ADD_U32_e64_]], 0, implicit $exec
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], -156, 0, implicit $exec
|
|
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
|
|
; GFX10-NEXT: SI_RETURN
|
|
;
|
|
; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
|
|
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
|
|
; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
|
|
; GFX12-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec
|
|
; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
|
|
; GFX12-NEXT: SI_RETURN
|
|
%0:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
|
|
%1:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
|
|
%2:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec
|
|
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %2
|
|
SI_RETURN
|
|
|
|
...
|
|
|