Cover all the missing cases and add very detailed tests for each rule. In summary: - Flat and Scratch, addrspace(0) and addrspace(5), loads are always divergent. - Global and Constant, addrspace(1) and addrspace(4), have real uniform loads, s_load, but require additional checks for align and flags in mmo. For not natural align or not uniform mmo do uniform-in-vgpr lowering. - Private, addrspace(3), only has instructions for divergent load, for uniform do uniform-in-vgpr lowering. - Store rules are simplified using Ptr32 and Ptr64. All operands need to be vgpr. Some tests have code size regression since they use more sgpr instructions, marked with FixMe comment to get back to later.
2690 lines
100 KiB
LLVM
2690 lines
100 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
|
|
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
|
|
; GFX9-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_and_b32 s0, s4, 3
|
|
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX9-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX9-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_and_b32 s0, s4, 3
|
|
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX8-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_and_b32 s4, s4, 3
|
|
; GFX7-NEXT: s_ashr_i32 s5, s4, 31
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_and_b32 s0, s4, 3
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX10-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX10-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_and_b32 s0, s4, 3
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX11-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX11-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <4 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
|
|
; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_and_b32 s0, s2, 3
|
|
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_and_b32 s0, s2, 3
|
|
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX8-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_and_b32 s0, s2, 3
|
|
; GFX7-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_and_b32 s0, s2, 3
|
|
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_and_b32 s0, s2, 3
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <4 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v4i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
|
|
; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
|
|
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
|
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
|
|
; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
|
|
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <4 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
|
|
; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_and_b32_e32 v2, 3, v0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: v_and_b32_e32 v2, 3, v0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_and_b32_e32 v2, 3, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 3, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <4 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v4i8_idx0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3]
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v4i8_idx0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v4i8_idx0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v4i8_idx0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v4i8_idx0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <4 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 0
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v4i8_idx1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v4i8_idx1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 1
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v4i8_idx1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v4i8_idx1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v4i8_idx1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <4 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 1
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v4i8_idx2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v4i8_idx2:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 2
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v4i8_idx2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:2
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v4i8_idx2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v4i8_idx2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:2
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <4 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 2
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v4i8_idx3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v4i8_idx3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 3
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v4i8_idx3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:3
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v4i8_idx3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v4i8_idx3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:3
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <4 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 3
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v4i8_idx0(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v4i8_idx0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v4i8_idx0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v4i8_idx0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v4i8_idx0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <4 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 0
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v4i8_idx1(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v4i8_idx1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v4i8_idx1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v4i8_idx1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v4i8_idx1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <4 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 1
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v4i8_idx2(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v4i8_idx2:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v4i8_idx2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v4i8_idx2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v4i8_idx2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <4 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 2
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v4i8_idx3(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v4i8_idx3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v4i8_idx3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v4i8_idx3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v4i8_idx3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <4 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <4 x i8> %vector, i32 3
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_and_b32 s0, s4, 7
|
|
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX9-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX9-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_and_b32 s0, s4, 7
|
|
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX8-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_and_b32 s4, s4, 7
|
|
; GFX7-NEXT: s_ashr_i32 s5, s4, 31
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_and_b32 s0, s4, 7
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX10-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX10-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_and_b32 s0, s4, 7
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX11-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX11-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_and_b32 s0, s2, 7
|
|
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_and_b32 s0, s2, 7
|
|
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX8-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_and_b32 s0, s2, 7
|
|
; GFX7-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_and_b32 s0, s2, 7
|
|
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_and_b32 s0, s2, 7
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
|
|
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
|
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_and_b32_e32 v2, 7, v2
|
|
; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
|
|
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_and_b32_e32 v2, 7, v0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: v_and_b32_e32 v2, 7, v0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 7, v0
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_and_b32_e32 v2, 7, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 7, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3]
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 0
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 1
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:1
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 1
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx2:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 2
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:2
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:2
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:2
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 2
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 3
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:3
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:3
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:3
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 3
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx4:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx4:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 4
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx4:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx4:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx4:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:4
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 4
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:5
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx5:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 5
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx5:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:5
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:5
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:5
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 5
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx6:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:6
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx6:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 6
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx6:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:6
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx6:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:6
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx6:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:6
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 6
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(ptr addrspace(4) inreg %ptr) {
|
|
; GFX9-LABEL: extractelement_sgpr_v8i8_idx7:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:7
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v8i8_idx7:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_add_u32 s0, s2, 7
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v8i8_idx7:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, -1
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:7
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v8i8_idx7:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] offset:7
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v8i8_idx7:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] offset:7
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <8 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 7
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx0(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 0
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx1(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 1
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx2(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx2:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 2
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx3(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 3
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx4(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx4:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx4:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx4:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx4:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 4
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx5(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:5
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx5:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 5, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx5:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:5
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:5
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 5
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx6(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:6
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx6:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx6:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx6:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:6
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx6:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:6
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 6
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v8i8_idx7(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:7
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v8i8_idx7:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 7, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v8i8_idx7:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v8i8_idx7:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:7
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v8i8_idx7:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:7
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <8 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <8 x i8> %vector, i32 7
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
|
|
; GFX9-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_and_b32 s0, s4, 15
|
|
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX9-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX9-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_and_b32 s0, s4, 15
|
|
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX8-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX8-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_and_b32 s4, s4, 15
|
|
; GFX7-NEXT: s_ashr_i32 s5, s4, 31
|
|
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_and_b32 s0, s4, 15
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX10-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX10-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_and_b32 s0, s4, 15
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX11-NEXT: s_add_u32 s0, s2, s0
|
|
; GFX11-NEXT: s_addc_u32 s1, s3, s1
|
|
; GFX11-NEXT: global_load_u8 v0, v0, s[0:1]
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <16 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_and_b32 s0, s2, 15
|
|
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_and_b32 s0, s2, 15
|
|
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX8-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_and_b32 s0, s2, 15
|
|
; GFX7-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_and_b32 s0, s2, 15
|
|
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, s1
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_and_b32 s0, s2, 15
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v2, 15, v2
|
|
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
|
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: v_and_b32_e32 v2, 15, v2
|
|
; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
|
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
|
|
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 15, v2
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
|
|
; GFX9-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: v_and_b32_e32 v2, 15, v0
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
|
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX8-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: v_and_b32_e32 v2, 15, v0
|
|
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX8-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX7-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: v_and_b32_e32 v0, 15, v0
|
|
; GFX7-NEXT: s_mov_b32 s0, s2
|
|
; GFX7-NEXT: s_mov_b32 s1, s3
|
|
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
|
; GFX7-NEXT: s_mov_b32 s2, 0
|
|
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX7-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, s3
|
|
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 15, v0
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
|
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
%vector = load <16 x i8>, ptr addrspace(4) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 %idx
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx0(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx0:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx0:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx0:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx0:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 0
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx1(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx1:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx1:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx1:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 1
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx2(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx2:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 2, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx2:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 2
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx3(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:3
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx3:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 3, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx3:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx3:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:3
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx3:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:3
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 3
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx4(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx4:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 4, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx4:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx4:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:4
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx4:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 4
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx5(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:5
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx5:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 5, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx5:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx5:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:5
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx5:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:5
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 5
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx6(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:6
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx6:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 6, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx6:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx6:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:6
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx6:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:6
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 6
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx7(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:7
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx7:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 7, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx7:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx7:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:7
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx7:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:7
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 7
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx8(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:8
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx8:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 8, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx8:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx8:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:8
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:8
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 8
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx9(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:9
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx9:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 9, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx9:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:9
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx9:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:9
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx9:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:9
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 9
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx10(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:10
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx10:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 10, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx10:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:10
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx10:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:10
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx10:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:10
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 10
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx11(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:11
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx11:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 11, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx11:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx11:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:11
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx11:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:11
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 11
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx12(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:12
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx12:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx12:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:12
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx12:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:12
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx12:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:12
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 12
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx13(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:13
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx13:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 13, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx13:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:13
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx13:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:13
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx13:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:13
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 13
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx14(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:14
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx14:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 14, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx14:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:14
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx14:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:14
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx14:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:14
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 14
|
|
ret i8 %element
|
|
}
|
|
|
|
define i8 @extractelement_vgpr_v16i8_idx15(ptr addrspace(1) %ptr) {
|
|
; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:15
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX8-LABEL: extractelement_vgpr_v16i8_idx15:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 15, v0
|
|
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX7-LABEL: extractelement_vgpr_v16i8_idx15:
|
|
; GFX7: ; %bb.0:
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX7-NEXT: s_mov_b32 s6, 0
|
|
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
|
; GFX7-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:15
|
|
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: extractelement_vgpr_v16i8_idx15:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:15
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: extractelement_vgpr_v16i8_idx15:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:15
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%vector = load <16 x i8>, ptr addrspace(1) %ptr
|
|
%element = extractelement <16 x i8> %vector, i32 15
|
|
ret i8 %element
|
|
}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GCN: {{.*}}
|