AMDGPU/GlobalISel: RegBankLegalize rules for bswap, cvt_ubyte, rcp (#187093)

This commit is contained in:
vangthao95 2026-03-17 13:20:26 -07:00 committed by GitHub
parent 29f6bdb65b
commit abb7288c1e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 1470 additions and 1208 deletions

View File

@ -607,6 +607,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
addRulesForGOpcs({G_BSWAP}, Standard)
.Uni(S16, {{UniInVgprS16}, {Vgpr16}})
.Div(S16, {{Vgpr16}, {Vgpr16}})
.Uni(S32, {{UniInVgprS32}, {Vgpr32}})
.Div(S32, {{Vgpr32}, {Vgpr32}})
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
.Div(V2S16, {{VgprV2S16}, {VgprV2S16}});
addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
G_AMDGPU_RCP_IFLAG},
Standard)
.Uni(S32, {{UniInVgprS32}, {Vgpr32}})
.Div(S32, {{Vgpr32}, {Vgpr32}});
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)

View File

@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -o - %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -o - %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefix=GFX10 %s
define amdgpu_ps i32 @s_bswap_i32(i32 inreg %src) {
; GFX7-LABEL: s_bswap_i32:
@ -449,15 +449,15 @@ define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
; GFX7-NEXT: v_mov_b32_e32 v0, s0
; GFX7-NEXT: v_alignbit_b32 v0, s1, v0, 24
; GFX7-NEXT: s_lshl_b32 s2, s0, 8
; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008
; GFX7-NEXT: v_alignbit_b32 v0, s1, v0, 24
; GFX7-NEXT: s_or_b32 s2, s3, s2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: s_and_b32 s0, 0xffff, s2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_or_b32_e32 v0, s0, v0
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
; GFX7-NEXT: s_or_b32 s2, s3, s2
; GFX7-NEXT: s_and_b32 s0, 0xffff, s0
; GFX7-NEXT: s_and_b32 s1, 0xffff, s2
; GFX7-NEXT: s_lshl_b32 s0, s0, 16
; GFX7-NEXT: s_or_b32 s0, s1, s0
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_bswap_v2i16:

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
@ -1434,32 +1434,34 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-LABEL: v_test_sitofp_i64_byte_to_f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_ffbh_i32_e32 v2, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, -1, v2
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, 32, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
; SI-NEXT: v_mov_b32_e32 v1, 0xff
; SI-NEXT: v_mov_b32_e32 v2, 0
; SI-NEXT: v_and_b32_e32 v1, 0xff, v0
; SI-NEXT: v_ffbh_i32_e32 v0, 0
; SI-NEXT: v_add_i32_e32 v0, vcc, -1, v0
; SI-NEXT: v_min_u32_e32 v3, 32, v0
; SI-NEXT: v_lshl_b64 v[0:1], v[1:2], v3
; SI-NEXT: v_min_u32_e32 v0, 1, v0
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_cvt_f32_i32_e32 v0, v0
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v2
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v3
; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_test_sitofp_i64_byte_to_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_ffbh_i32_e32 v2, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, -1, v2
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; VI-NEXT: v_mov_b32_e32 v1, 0xff
; VI-NEXT: v_mov_b32_e32 v2, 0
; VI-NEXT: v_and_b32_e32 v1, 0xff, v0
; VI-NEXT: v_ffbh_i32_e32 v0, 0
; VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0
; VI-NEXT: v_min_u32_e32 v3, 32, v0
; VI-NEXT: v_lshlrev_b64 v[0:1], v3, v[1:2]
; VI-NEXT: v_min_u32_e32 v0, 1, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: v_cvt_f32_i32_e32 v0, v0
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v3
; VI-NEXT: v_ldexp_f32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
%masked = and i64 %arg0, 255
@ -1471,30 +1473,32 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
; SI-LABEL: v_test_uitofp_i64_byte_to_f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_ffbh_u32_e32 v2, 0
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, 32, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
; SI-NEXT: v_mov_b32_e32 v1, 0xff
; SI-NEXT: v_mov_b32_e32 v2, 0
; SI-NEXT: v_and_b32_e32 v1, 0xff, v0
; SI-NEXT: v_ffbh_u32_e32 v0, 0
; SI-NEXT: v_min_u32_e32 v3, 32, v0
; SI-NEXT: v_lshl_b64 v[0:1], v[1:2], v3
; SI-NEXT: v_min_u32_e32 v0, 1, v0
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v2
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v3
; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_test_uitofp_i64_byte_to_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_ffbh_u32_e32 v2, 0
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; VI-NEXT: v_mov_b32_e32 v1, 0xff
; VI-NEXT: v_mov_b32_e32 v2, 0
; VI-NEXT: v_and_b32_e32 v1, 0xff, v0
; VI-NEXT: v_ffbh_u32_e32 v0, 0
; VI-NEXT: v_min_u32_e32 v3, 32, v0
; VI-NEXT: v_lshlrev_b64 v[0:1], v3, v[1:2]
; VI-NEXT: v_min_u32_e32 v0, 1, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v3
; VI-NEXT: v_ldexp_f32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
%masked = and i64 %arg0, 255

File diff suppressed because it is too large Load Diff