AMDGPU/GlobalISel: RegBankLegalize rules for ds_add/sub_gs_reg_rtn (#185991)

This commit is contained in:
vangthao95 2026-03-18 09:00:13 -07:00 committed by GitHub
parent 0f5d8a960f
commit f609344120
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 59 additions and 2 deletions

View File

@ -1620,6 +1620,11 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
.Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}});
addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
Standard)
.Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
.Div(S64, {{Vgpr64}, {IntrId, Vgpr32}});
addRulesForIOpcs(
{amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, Standard)
.Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
declare i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32, i32 immarg)
declare i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32, i32 immarg)
@ -28,6 +28,19 @@ define amdgpu_gs void @test_add_32_use(i32 %arg, ptr addrspace(1) %out) {
ret void
}
define amdgpu_gs void @test_add_32_s(i32 inreg %arg, ptr addrspace(1) %out) {
; CHECK-LABEL: test_add_32_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_add_gs_reg_rtn v[2:3], v2 offset:16 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v2, off
; CHECK-NEXT: s_endpgm
%res = call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %arg, i32 16)
store i32 %res, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_gs void @test_add_64(i32 %arg) {
; CHECK-LABEL: test_add_64:
; CHECK: ; %bb.0:
@ -50,3 +63,16 @@ define amdgpu_gs void @test_add_64_use(i32 %arg, ptr addrspace(1) %out) {
store i64 %res, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_gs void @test_add_64_s(i32 inreg %arg, ptr addrspace(1) %out) {
; CHECK-LABEL: test_add_64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_add_gs_reg_rtn v[2:3], v2 offset:32 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b64 v[0:1], v[2:3], off
; CHECK-NEXT: s_endpgm
%res = call i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32 %arg, i32 32)
store i64 %res, ptr addrspace(1) %out, align 8
ret void
}

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
declare i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32, i32 immarg)
declare i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32, i32 immarg)
@ -28,6 +28,19 @@ define amdgpu_gs void @test_sub_32_use(i32 %arg, ptr addrspace(1) %out) {
ret void
}
define amdgpu_gs void @test_sub_32_s(i32 inreg %arg, ptr addrspace(1) %out) {
; CHECK-LABEL: test_sub_32_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_sub_gs_reg_rtn v[2:3], v2 offset:16 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v2, off
; CHECK-NEXT: s_endpgm
%res = call i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32 %arg, i32 16)
store i32 %res, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_gs void @test_sub_64(i32 %arg) {
; CHECK-LABEL: test_sub_64:
; CHECK: ; %bb.0:
@ -51,3 +64,16 @@ define amdgpu_gs void @test_sub_64_use(i32 %arg, ptr addrspace(1) %out) {
ret void
}
define amdgpu_gs void @test_sub_64_s(i32 inreg %arg, ptr addrspace(1) %out) {
; CHECK-LABEL: test_sub_64_s:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_mov_b32_e32 v2, s0
; CHECK-NEXT: ds_sub_gs_reg_rtn v[2:3], v2 offset:32 gds
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_b64 v[0:1], v[2:3], off
; CHECK-NEXT: s_endpgm
%res = call i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32 %arg, i32 32)
store i64 %res, ptr addrspace(1) %out, align 8
ret void
}