Regbanklegalize rules for INTRIN_IMAGE loads and stores. Because of very large number of different type signatures, rule specifies only function for lowering (waterfall lowering of RsrcIdx operand if needed) and this function also applies register banks.
1026 lines
58 KiB
LLVM
1026 lines
58 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX10GISEL %s
|
|
; TODO: global-isel produces more code - there will need to be some more combines in the postregbankselectcombine phase
|
|
; Depends on some other changes to pass this test - those are in review separately
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
|
|
; GFX10-LABEL: sample_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
|
|
; GFX10-LABEL: sample_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX10-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
|
; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
|
|
; GFX10-LABEL: sample_d_3d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v15, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v13, v5
|
|
; GFX10-NEXT: v_mov_b32_e32 v12, v4
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
|
; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
|
|
; GFX10-NEXT: image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_3d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v7, s0, 16, v8
|
|
; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
|
|
; GFX10-LABEL: sample_c_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_d_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, s0, 16, v3
|
|
; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
|
|
; GFX10-LABEL: sample_c_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_d_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
|
|
; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
|
|
; GFX10-LABEL: sample_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
|
; GFX10-LABEL: sample_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, v0
|
|
; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, s0, 16, v6
|
|
; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
|
|
; GFX10-LABEL: sample_c_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_d_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
|
; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
|
; GFX10-LABEL: sample_c_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v13, v7
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v4
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v0
|
|
; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_d_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v6, s0, 16, v7
|
|
; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
|
|
; GFX10-LABEL: sample_cd_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_cd_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
|
|
; GFX10-LABEL: sample_cd_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX10-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_cd_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
|
; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
|
|
; GFX10-LABEL: sample_c_cd_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_cd_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, s0, 16, v3
|
|
; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
|
|
; GFX10-LABEL: sample_c_cd_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_cd_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
|
|
; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
|
|
; GFX10-LABEL: sample_cd_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_cd_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
|
; GFX10-LABEL: sample_cd_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, v0
|
|
; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100
|
|
; GFX10-NEXT: image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_cd_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, s0, 16, v6
|
|
; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
|
|
; GFX10-LABEL: sample_c_cd_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_cd_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3
|
|
; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
|
|
; GFX10-LABEL: sample_c_cd_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v13, v7
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v4
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v0
|
|
; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_cd_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v6, s0, 16, v7
|
|
; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v15, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v13, v5
|
|
; GFX10-NEXT: v_mov_b32_e32 v12, v4
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
|
; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v7, s0, 16, v8
|
|
; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret float %v
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v15, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v13, v5
|
|
; GFX10-NEXT: v_mov_b32_e32 v12, v4
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
|
; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v7, 16, v6
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v7, s0, 16, v8
|
|
; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <2 x float> %v
|
|
}
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
|
|
; GFX10-LABEL: sample_g16_noa16_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_d_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, s0, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
|
|
; GFX10-LABEL: sample_g16_noa16_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_d_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
|
|
; GFX10-LABEL: sample_g16_noa16_d_3d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v2
|
|
; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v1, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, s0, 16, v9
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v10
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, s0, 16, v5
|
|
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3
|
|
; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, s0, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1
|
|
; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
|
|
; GFX10-LABEL: sample_g16_noa16_cd_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, s0, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
|
|
; GFX10-LABEL: sample_g16_noa16_cd_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_cd_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_cd_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3
|
|
; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_cd_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, s0, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_cd_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1
|
|
; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0
|
|
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret float %v
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
|
|
; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v3
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0
|
|
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <2 x float> %v
|
|
}
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
|
|
; GFX10-LABEL: sample_d_1d_g16_a16:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_1d_g16_a16:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, s0, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, s0, 16, v2
|
|
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_2d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
|
|
; GFX10-LABEL: sample_d_2d_g16_a16:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_2d_g16_a16:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v5, 16, v4
|
|
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_3d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
|
|
; GFX10-LABEL: sample_d_3d_g16_a16:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v12, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v5
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10-NEXT: v_perm_b32 v11, v7, v6, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v9, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v7, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10GISEL-LABEL: sample_d_3d_g16_a16:
|
|
; GFX10GISEL: ; %bb.0: ; %main_body
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6
|
|
; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v2
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v3, s0, 16, v3
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v7, 16, v5
|
|
; GFX10GISEL-NEXT: v_lshl_or_b32 v5, s0, 16, v6
|
|
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10GISEL-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readonly }
|
|
attributes #2 = { nounwind readnone }
|