
We weren't fully respecting the type of a def of an immediate vs. the type at the use point. Refactor the folding logic to track the value to fold, as well as a subregister to apply to the underlying value. This is similar to how PeepholeOpt tracks subregisters (though only for pure copy-like instructions, no constants). Fixes #139317
203 lines
9.6 KiB
YAML
203 lines
9.6 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=si-fold-operands -o - %s | FileCheck %s
|
|
|
|
# Test behavior of folding into op_sel operands through reg_sequence
|
|
# with subregister uses
|
|
|
|
---
|
|
name: issue139317
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1107312640
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, [[REG_SEQUENCE]].sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_32 = S_MOV_B32 1107312640
|
|
%2:sreg_32 = S_MOV_B32 1006632960
|
|
%3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1
|
|
%4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %4
|
|
...
|
|
|
|
---
|
|
name: issue139317_sub0
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317_sub0
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1107312640
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 4, 15360, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_32 = S_MOV_B32 1107312640
|
|
%2:sreg_32 = S_MOV_B32 1006632960
|
|
%3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1
|
|
%4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub0:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %4
|
|
...
|
|
|
|
---
|
|
name: issue139317_nested_reg_sequence
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317_nested_reg_sequence
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1107312640
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960
|
|
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1191200256
|
|
; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1157645312
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
|
|
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_2]], %subreg.sub0, killed [[S_MOV_B32_3]], %subreg.sub1
|
|
; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, killed [[COPY]], 8, [[REG_SEQUENCE2]].sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_32 = S_MOV_B32 1107312640
|
|
%2:sreg_32 = S_MOV_B32 1006632960
|
|
%3:sreg_32 = S_MOV_B32 1191200256
|
|
%4:sreg_32 = S_MOV_B32 1157645312
|
|
%5:sgpr_64 = REG_SEQUENCE killed %2, %subreg.sub0, killed %1, %subreg.sub1
|
|
%6:sgpr_64 = REG_SEQUENCE killed %3, %subreg.sub0, killed %4, %subreg.sub1
|
|
%7:sgpr_128 = REG_SEQUENCE %5, %subreg.sub0_sub1, %6, %subreg.sub2_sub3
|
|
%8:vgpr_32 = nofpexcept V_PK_ADD_F16 8, killed %0, 8, %7.sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %8
|
|
|
|
...
|
|
|
|
---
|
|
name: issue139317_foldable_neg_imm
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317_foldable_neg_imm
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -16
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, -16, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_32 = S_MOV_B32 -16
|
|
%2:sreg_32 = S_MOV_B32 1006632960
|
|
%3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1
|
|
%4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %4
|
|
...
|
|
|
|
---
|
|
name: issue139317_foldable_fp_imm_0
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317_foldable_fp_imm_0
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15360
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, 15360, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_32 = S_MOV_B32 15360
|
|
%2:sreg_32 = S_MOV_B32 1006632960
|
|
%3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1
|
|
%4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %4
|
|
...
|
|
|
|
---
|
|
name: issue139317_foldable_fp_imm_1
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317_foldable_fp_imm_1
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1006632960
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 15360
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 4, 15360, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_32 = S_MOV_B32 1006632960
|
|
%2:sreg_32 = S_MOV_B32 15360
|
|
%3:sgpr_64 = REG_SEQUENCE killed %2:sreg_32, %subreg.sub0, killed %1:sreg_32, %subreg.sub1
|
|
%4:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %3.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %4
|
|
...
|
|
|
|
---
|
|
name: issue139317_reg_sequence_subreg_use
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317_reg_sequence_subreg_use
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 4755871576254054400
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B]].sub0, %subreg.sub0, [[S_MOV_B]].sub1, %subreg.sub1
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, [[REG_SEQUENCE]].sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_64 = S_MOV_B64_IMM_PSEUDO 4755871576254054400
|
|
%2:sgpr_64 = REG_SEQUENCE killed %1.sub0, %subreg.sub0, %1.sub1, %subreg.sub1
|
|
%3:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %2.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %3
|
|
...
|
|
|
|
---
|
|
name: issue139317_reg_sequence_subreg_use_foldable_imm
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
|
|
; CHECK-LABEL: name: issue139317_reg_sequence_subreg_use_foldable_imm
|
|
; CHECK: liveins: $vgpr0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; CHECK-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 65971704299520
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[S_MOV_B]].sub0, %subreg.sub0, [[S_MOV_B]].sub1, %subreg.sub1
|
|
; CHECK-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, 15360, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F16_]]
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%1:sreg_64 = S_MOV_B64_IMM_PSEUDO 65971704299520
|
|
%2:sgpr_64 = REG_SEQUENCE killed %1.sub0, %subreg.sub0, %1.sub1, %subreg.sub1
|
|
%3:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0:vgpr_32, 8, %2.sub1:sgpr_64, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
S_ENDPGM 0, implicit %3
|
|
...
|