
Add inreg test for sgpr purpose This is the second PR after https://github.com/llvm/llvm-project/pull/135729. To test sgpr inputs and outputs, using inreg cases for bit-conversions --------- Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com>
69 lines
2.6 KiB
LLVM
69 lines
2.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
|
|
; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
|
|
|
|
define amdgpu_kernel void @bitcast_i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) {
|
|
; SI-LABEL: bitcast_i8ptr_v16i8ptr:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; SI-NEXT: v_mov_b32_e32 v1, s5
|
|
; SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; SI-NEXT: v_mov_b32_e32 v3, s7
|
|
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: bitcast_i8ptr_v16i8ptr:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; VI-NEXT: v_mov_b32_e32 v4, s0
|
|
; VI-NEXT: v_mov_b32_e32 v5, s1
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; VI-NEXT: v_mov_b32_e32 v1, s5
|
|
; VI-NEXT: v_mov_b32_e32 v2, s6
|
|
; VI-NEXT: v_mov_b32_e32 v3, s7
|
|
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX9-LABEL: bitcast_i8ptr_v16i8ptr:
|
|
; GFX9: ; %bb.0: ; %entry
|
|
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s7
|
|
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
|
|
; GFX9-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: bitcast_i8ptr_v16i8ptr:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7
|
|
; GFX11-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
|
|
; GFX11-NEXT: s_endpgm
|
|
entry:
|
|
%0 = load <16 x i8>, ptr addrspace(1) %in
|
|
store <16 x i8> %0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|