AMDGPU/GlobalISel: RegBankLegalize rules for bswap, cvt_ubyte, rcp (#187093)
This commit is contained in:
parent
29f6bdb65b
commit
abb7288c1e
@ -607,6 +607,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
|
||||
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
|
||||
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
|
||||
|
||||
addRulesForGOpcs({G_BSWAP}, Standard)
|
||||
.Uni(S16, {{UniInVgprS16}, {Vgpr16}})
|
||||
.Div(S16, {{Vgpr16}, {Vgpr16}})
|
||||
.Uni(S32, {{UniInVgprS32}, {Vgpr32}})
|
||||
.Div(S32, {{Vgpr32}, {Vgpr32}})
|
||||
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
|
||||
.Div(V2S16, {{VgprV2S16}, {VgprV2S16}});
|
||||
|
||||
addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
|
||||
G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
|
||||
G_AMDGPU_RCP_IFLAG},
|
||||
Standard)
|
||||
.Uni(S32, {{UniInVgprS32}, {Vgpr32}})
|
||||
.Div(S32, {{Vgpr32}, {Vgpr32}});
|
||||
|
||||
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
|
||||
|
||||
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
||||
define amdgpu_ps i32 @s_bswap_i32(i32 inreg %src) {
|
||||
; GFX7-LABEL: s_bswap_i32:
|
||||
@ -449,15 +449,15 @@ define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) {
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, s1, v0, 24
|
||||
; GFX7-NEXT: s_lshl_b32 s2, s0, 8
|
||||
; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008
|
||||
; GFX7-NEXT: v_alignbit_b32 v0, s1, v0, 24
|
||||
; GFX7-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
||||
; GFX7-NEXT: s_and_b32 s0, 0xffff, s2
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: v_or_b32_e32 v0, s0, v0
|
||||
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX7-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX7-NEXT: s_and_b32 s0, 0xffff, s0
|
||||
; GFX7-NEXT: s_and_b32 s1, 0xffff, s2
|
||||
; GFX7-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX7-NEXT: s_or_b32 s0, s1, s0
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX8-LABEL: s_bswap_v2i16:
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
|
||||
@ -1434,32 +1434,34 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
|
||||
; SI-LABEL: v_test_sitofp_i64_byte_to_f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: v_ffbh_i32_e32 v2, 0
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, -1, v2
|
||||
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||
; SI-NEXT: v_mov_b32_e32 v1, 0
|
||||
; SI-NEXT: v_min_u32_e32 v2, 32, v2
|
||||
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
|
||||
; SI-NEXT: v_mov_b32_e32 v1, 0xff
|
||||
; SI-NEXT: v_mov_b32_e32 v2, 0
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xff, v0
|
||||
; SI-NEXT: v_ffbh_i32_e32 v0, 0
|
||||
; SI-NEXT: v_add_i32_e32 v0, vcc, -1, v0
|
||||
; SI-NEXT: v_min_u32_e32 v3, 32, v0
|
||||
; SI-NEXT: v_lshl_b64 v[0:1], v[1:2], v3
|
||||
; SI-NEXT: v_min_u32_e32 v0, 1, v0
|
||||
; SI-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; SI-NEXT: v_cvt_f32_i32_e32 v0, v0
|
||||
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v2
|
||||
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v3
|
||||
; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||||
; SI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; VI-LABEL: v_test_sitofp_i64_byte_to_f32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: v_ffbh_i32_e32 v2, 0
|
||||
; VI-NEXT: v_add_u32_e32 v2, vcc, -1, v2
|
||||
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||
; VI-NEXT: v_mov_b32_e32 v1, 0
|
||||
; VI-NEXT: v_min_u32_e32 v2, 32, v2
|
||||
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
|
||||
; VI-NEXT: v_mov_b32_e32 v1, 0xff
|
||||
; VI-NEXT: v_mov_b32_e32 v2, 0
|
||||
; VI-NEXT: v_and_b32_e32 v1, 0xff, v0
|
||||
; VI-NEXT: v_ffbh_i32_e32 v0, 0
|
||||
; VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0
|
||||
; VI-NEXT: v_min_u32_e32 v3, 32, v0
|
||||
; VI-NEXT: v_lshlrev_b64 v[0:1], v3, v[1:2]
|
||||
; VI-NEXT: v_min_u32_e32 v0, 1, v0
|
||||
; VI-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; VI-NEXT: v_cvt_f32_i32_e32 v0, v0
|
||||
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
|
||||
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v3
|
||||
; VI-NEXT: v_ldexp_f32 v0, v0, v1
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
%masked = and i64 %arg0, 255
|
||||
@ -1471,30 +1473,32 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
|
||||
; SI-LABEL: v_test_uitofp_i64_byte_to_f32:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: v_ffbh_u32_e32 v2, 0
|
||||
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||
; SI-NEXT: v_mov_b32_e32 v1, 0
|
||||
; SI-NEXT: v_min_u32_e32 v2, 32, v2
|
||||
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
|
||||
; SI-NEXT: v_mov_b32_e32 v1, 0xff
|
||||
; SI-NEXT: v_mov_b32_e32 v2, 0
|
||||
; SI-NEXT: v_and_b32_e32 v1, 0xff, v0
|
||||
; SI-NEXT: v_ffbh_u32_e32 v0, 0
|
||||
; SI-NEXT: v_min_u32_e32 v3, 32, v0
|
||||
; SI-NEXT: v_lshl_b64 v[0:1], v[1:2], v3
|
||||
; SI-NEXT: v_min_u32_e32 v0, 1, v0
|
||||
; SI-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
|
||||
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v2
|
||||
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v3
|
||||
; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1
|
||||
; SI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; VI-LABEL: v_test_uitofp_i64_byte_to_f32:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: v_ffbh_u32_e32 v2, 0
|
||||
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
||||
; VI-NEXT: v_mov_b32_e32 v1, 0
|
||||
; VI-NEXT: v_min_u32_e32 v2, 32, v2
|
||||
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
|
||||
; VI-NEXT: v_mov_b32_e32 v1, 0xff
|
||||
; VI-NEXT: v_mov_b32_e32 v2, 0
|
||||
; VI-NEXT: v_and_b32_e32 v1, 0xff, v0
|
||||
; VI-NEXT: v_ffbh_u32_e32 v0, 0
|
||||
; VI-NEXT: v_min_u32_e32 v3, 32, v0
|
||||
; VI-NEXT: v_lshlrev_b64 v[0:1], v3, v[1:2]
|
||||
; VI-NEXT: v_min_u32_e32 v0, 1, v0
|
||||
; VI-NEXT: v_or_b32_e32 v0, v1, v0
|
||||
; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
|
||||
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
|
||||
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v3
|
||||
; VI-NEXT: v_ldexp_f32 v0, v0, v1
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
%masked = and i64 %arg0, 255
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user