llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
Brox Chen 6dbc01e801
[AMDGPU][True16][CodeGen] update GFX11Plus codegen test with true16 flag (#135078)
This is a NFC patch.

This patch run a bulk update on CodeGen tests that are impacted by the
true16 features. This patch applies:
1. duplicate GFX11plus runlines and apply them with
"+mattr=+real-true16" and "+mattr=-real-true16"
2. update the test with the update script

For some GISEL runlines, the current CodeGen do not fully support the
true16 version. Still update the runlines, but comment out the failing
one, and added a "FIXME-TRUE16" comment to that test for easier
tracking. These test will be fixed in the following patches.

This is in a transition state that we support both
"+real-true16/-real-true16" in our code base. We plan to move to
"+real-true16" as default, and finally remove "-real-true16" mode and
test lines.
2025-04-23 13:06:52 -04:00

491 lines
26 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; FIXME-TRUE16. enable gisel
; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GFX10PLUS-LABEL: sample_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
; GFX10PLUS-LABEL: sample_3d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_3d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
; GFX10PLUS-LABEL: sample_cube_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_cube_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
; GFX10PLUS-LABEL: sample_1darray_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_1darray_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
; GFX10PLUS-LABEL: sample_2darray_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_2darray_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GFX10PLUS-LABEL: sample_b_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_b_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GFX10PLUS-LABEL: sample_b_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_b_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GFX10PLUS-LABEL: sample_c_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_c_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GFX10PLUS-LABEL: sample_c_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_c_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
; GFX10PLUS-LABEL: sample_d_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_d_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; GFX10PLUS-LABEL: sample_d_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_d_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
; GFX10PLUS-LABEL: sample_l_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_l_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
; GFX10PLUS-LABEL: sample_l_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_l_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_nortn_mix_1:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: sample_nortn_mix_1:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_mov_b32 s12, exec_lo
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: ; return to shader part epilog
main_body:
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_nortn_mix_2:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: sample_nortn_mix_2:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_mov_b32 s12, exec_lo
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: v_mov_b32_e32 v4, v0
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3:
; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
;
; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3:
; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_3:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1
; GFX12-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: sample_nortn_mix_3:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1
; GFX12-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%v.0 = extractelement <4 x float> %v, i32 0
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %u
}
define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4:
; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(2)
; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
;
; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4:
; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(2)
; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_4:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1
; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x2
; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: sample_nortn_mix_4:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1
; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x2
; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%v.0 = extractelement <4 x float> %v, i32 0
%v.1 = extractelement <4 x float> %v, i32 0
%v.2 = extractelement <4 x float> %v, i32 0
%v.3 = extractelement <4 x float> %v, i32 0
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%u.0 = extractelement <4 x float> %u, i32 0
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %u
}
define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
; GFX10PLUS-LABEL: sample_d_1d_g16_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_d_1d_g16_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX10-GISEL: {{.*}}
; GFX10-SDAG: {{.*}}
; GFX11: {{.*}}
; GFX11-GISEL: {{.*}}
; GFX11-GISEL-FAKE16: {{.*}}
; GFX11-SDAG: {{.*}}
; GFX11-SDAG-FAKE16: {{.*}}
; GFX11-SDAG-TRUE16: {{.*}}
; GFX12-GISEL-FAKE16: {{.*}}
; GFX12-SDAG-FAKE16: {{.*}}
; GFX12-SDAG-TRUE16: {{.*}}