llvm-project/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
Matt Arsenault 4ec890857d
AMDGPU: Try to constrain av registers to VGPR to enable ds_write2 formation (#156400)
In future changes we will have more AV_ virtual registers, which
currently
block the formation of write2. Most of the time these registers can
simply
be constrained to VGPR, so do that.

Also relaxes the constraint in flat merging case. We already have the
necessary
code to insert copies to the original result registers, so there's no
point
in avoiding it.

Addresses the easy half of #155769
2025-09-03 00:48:21 +00:00

211 lines
8.3 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-load-store-opt -o - %s | FileCheck %s
---
name: ds_write_b32__av32_x2
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: ds_write_b32__av32_x2
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_32 = COPY $vgpr1
%2:av_32 = COPY $vgpr2
DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
---
name: ds_write_b32__av32_x2_subregs_different_reg
body: |
bb.0:
liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-LABEL: name: ds_write_b32__av32_x2_subregs_different_reg
; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]].sub0, [[COPY2]].sub1, 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_64_align2 = COPY $vgpr2_vgpr3
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B32_gfx9 %0, %1.sub0, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2.sub1, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
---
name: ds_write_b32__unaligned_av64_subregs
body: |
bb.0:
liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4
; CHECK-LABEL: name: ds_write_b32__unaligned_av64_subregs
; CHECK: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]].sub0, [[COPY2]].sub1, 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_64 = COPY $vgpr1_vgpr2
%2:av_64 = COPY $vgpr3_vgpr4
DS_WRITE_B32_gfx9 %0, %1.sub0, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2.sub1, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
---
name: ds_write_b32__av32_x2_subregs_same_reg
body: |
bb.0:
liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-LABEL: name: ds_write_b32__av32_x2_subregs_same_reg
; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]].sub0, [[COPY1]].sub1, 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_64_align2 = COPY $vgpr2_vgpr3
DS_WRITE_B32_gfx9 %0, %1.sub0, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %1.sub1, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
---
name: ds_write_b32__av32__vgpr32
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: ds_write_b32__av32__vgpr32
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
---
name: ds_write_b32__vgpr32__av32
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: ds_write_b32__vgpr32__av32
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:av_32 = COPY $vgpr2
DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
...
---
name: ds_write_b64__av64_x2
body: |
bb.0:
liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-LABEL: name: ds_write_b64__av64_x2
; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_64_align2 = COPY $vgpr2_vgpr3
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3)
DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
...
---
name: ds_write_b64__av64_x2_subregs
body: |
bb.0:
liveins: $vgpr0, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9
; CHECK-LABEL: name: ds_write_b64__av64_x2_subregs
; CHECK: liveins: $vgpr0, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY $vgpr6_vgpr7_vgpr8_vgpr9
; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]].sub2_sub3, [[COPY2]].sub2_sub3, 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_128_align2 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:av_128_align2 = COPY $vgpr6_vgpr7_vgpr8_vgpr9
DS_WRITE_B64_gfx9 %0, %1.sub2_sub3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
DS_WRITE_B64_gfx9 %0, %2.sub2_sub3, 96, 0, implicit $exec :: (store (s64), addrspace 3)
...
---
name: ds_writest64_b32__av32_x2
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: ds_writest64_b32__av32_x2
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: DS_WRITE2ST64_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 1, 3, 0, implicit $exec :: (store (s32), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_32 = COPY $vgpr1
%2:av_32 = COPY $vgpr2
DS_WRITE_B32_gfx9 %0, %1, 256, 0, implicit $exec :: (store (s32), addrspace 3)
DS_WRITE_B32_gfx9 %0, %2, 768, 0, implicit $exec :: (store (s32), addrspace 3)
...
---
name: ds_writest64_b64__av64_x2
body: |
bb.0:
liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-LABEL: name: ds_writest64_b64__av64_x2
; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
; CHECK-NEXT: DS_WRITE2ST64_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 1, 3, 0, implicit $exec :: (store (s64), addrspace 3)
%0:vgpr_32 = COPY $vgpr0
%1:av_64_align2 = COPY $vgpr2_vgpr3
%2:av_64_align2 = COPY $vgpr4_vgpr5
DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3)
DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3)
...