llvm-project/llvm/test/CodeGen/AMDGPU/merge-image-load.mir
Stanislav Mekhanoshin cefa1c5ca9 [AMDGPU] Fix combined MMO in load-store merge
Loads and stores can be out of order in the SILoadStoreOptimizer.
When combining MachineMemOperands of two instructions operands are
sent in the IR order into the combineKnownAdjacentMMOs. At the
moment it picks the first operand and just replaces its offset and
size. This essentially loses alignment information and may generally
result in an incorrect base pointer to be used.

Use a base pointer in memory addresses order instead and only adjust
size.

Differential Revision: https://reviews.llvm.org/D120370
2022-02-24 10:47:57 -08:00

470 lines
26 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
# GFX9-LABEL: name: image_load_merged_v1v3
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
name: image_load_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v1v3_reversed
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2
name: image_load_merged_v1v3_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v2v2
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3
name: image_load_merged_v2v2
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
%7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v2v2_reversed
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3
# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1
name: image_load_merged_v2v2_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
%7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v3v1
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3
name: image_load_merged_v3v1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
...
---
# GFX9-LABEL: name: image_load_merged_v3v1_reversed
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0
name: image_load_merged_v3v1_reversed
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
...
---
# GFX9-LABEL: name: image_load_divided_merged
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4)
name: image_load_divided_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
%10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_divided_not_merged
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_divided_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vreg_128 = COPY %2
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128))
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_dmask_overlapped_not_merged
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_dmask_overlapped_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_dmask_not_disjoint_not_merged
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_dmask_not_disjoint_not_merged
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_0
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_0
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_1
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_1
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%5:vgpr_32 = COPY %2.sub3
%6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_10
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_10
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_3
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_4
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_4
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_5
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_5
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_6
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_6
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_7
# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_7
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_8
# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_8
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_not_merged_9
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
name: image_load_not_merged_9
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_mip_merged_v1v3
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
name: image_load_mip_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_mip_pck_merged_v1v3
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
name: image_load_mip_pck_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_mip_pck_sgn_merged_v1v3
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
name: image_load_mip_pck_sgn_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_pck_merged_v1v3
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
name: image_load_pck_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---
# GFX9-LABEL: name: image_load_pck_sgn_merged_v1v3
# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0
# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3
name: image_load_pck_sgn_merged_v1v3
body: |
bb.0.entry:
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
%3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
%4:vgpr_32 = COPY %2.sub3
%5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
%6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4)
%7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
...
---