llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll
Mariusz Sikora 4f5ccf22fa
[AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. (#130041)
Co-authored-by: Ivan Kosarev <ivan.kosarev@amd.com>
2025-03-19 16:08:08 +01:00

88 lines
5.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>)
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
%r = bitcast <10 x i32> %a to <10 x float>
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
store <3 x float> %o, ptr addrspace(1) %origin
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
store <3 x float> %d, ptr addrspace(1) %dir
ret <10 x float> %r
}
define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) {
; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7
; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5
; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3
; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1
; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off
; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4
; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6
; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8
; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1
; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3]
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off
; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0
%ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1
%ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2
%v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr)
%a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0
%r = bitcast <10 x i32> %a to <10 x float>
%o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1
store <3 x float> %o, ptr addrspace(1) %origin
%d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2
store <3 x float> %d, ptr addrspace(1) %dir
ret <10 x float> %r
}